aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp1658
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/Makefile16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp348
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp365
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h63
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp245
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp432
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp82
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h37
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp322
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp133
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h96
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp307
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h75
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp389
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp86
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h68
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h105
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td344
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp1397
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp211
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp663
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td199
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp293
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp2282
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp1604
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h126
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp433
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h102
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp2205
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp9290
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h717
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td1137
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td939
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td1300
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp2246
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h235
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td3424
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td816
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp482
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h46
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp216
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h191
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h6586
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp1007
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h113
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td314
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRelocations.h56
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td520
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td599
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td169
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td314
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td374
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td80
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td96
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td112
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td129
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td385
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp22
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp277
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h251
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp170
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h93
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp63
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h27
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp420
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp26
74 files changed, 48142 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
new file mode 100644
index 000000000000..d7066d58709a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -0,0 +1,1658 @@
+//===-- PPCAsmParser.cpp - Parse PowerPC asm to MCInst instructions ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+static unsigned RRegs[32] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned RRegsNoR0[32] = {
+ PPC::ZERO,
+ PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+static unsigned XRegs[32] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned XRegsNoX0[32] = {
+ PPC::ZERO8,
+ PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+static unsigned FRegs[32] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+static unsigned VRegs[32] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+static unsigned VSRegs[64] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+static unsigned VSFRegs[64] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+static unsigned CRBITRegs[32] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+static unsigned CRRegs[8] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+// Evaluate an expression containing condition register
+// or condition register field symbols. Returns positive
+// value on success, or -1 on error.
+static int64_t
+EvaluateCRExpr(const MCExpr *E) {
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ return -1;
+
+ case MCExpr::Constant: {
+ int64_t Res = cast<MCConstantExpr>(E)->getValue();
+ return Res < 0 ? -1 : Res;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+ StringRef Name = SRE->getSymbol().getName();
+
+ if (Name == "lt") return 0;
+ if (Name == "gt") return 1;
+ if (Name == "eq") return 2;
+ if (Name == "so") return 3;
+ if (Name == "un") return 3;
+
+ if (Name == "cr0") return 0;
+ if (Name == "cr1") return 1;
+ if (Name == "cr2") return 2;
+ if (Name == "cr3") return 3;
+ if (Name == "cr4") return 4;
+ if (Name == "cr5") return 5;
+ if (Name == "cr6") return 6;
+ if (Name == "cr7") return 7;
+
+ return -1;
+ }
+
+ case MCExpr::Unary:
+ return -1;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ int64_t LHSVal = EvaluateCRExpr(BE->getLHS());
+ int64_t RHSVal = EvaluateCRExpr(BE->getRHS());
+ int64_t Res;
+
+ if (LHSVal < 0 || RHSVal < 0)
+ return -1;
+
+ switch (BE->getOpcode()) {
+ default: return -1;
+ case MCBinaryExpr::Add: Res = LHSVal + RHSVal; break;
+ case MCBinaryExpr::Mul: Res = LHSVal * RHSVal; break;
+ }
+
+ return Res < 0 ? -1 : Res;
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+struct PPCOperand;
+
+class PPCAsmParser : public MCTargetAsmParser {
+ MCSubtargetInfo &STI;
+ MCAsmParser &Parser;
+ const MCInstrInfo &MII;
+ bool IsPPC64;
+ bool IsDarwin;
+
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+ bool isPPC64() const { return IsPPC64; }
+ bool isDarwin() const { return IsDarwin; }
+
+ bool MatchRegisterName(const AsmToken &Tok,
+ unsigned &RegNo, int64_t &IntVal);
+
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+
+ const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant);
+ const MCExpr *FixupVariantKind(const MCExpr *E);
+ bool ParseExpression(const MCExpr *&EVal);
+ bool ParseDarwinExpression(const MCExpr *&EVal);
+
+ bool ParseOperand(OperandVector &Operands);
+
+ bool ParseDirectiveWord(unsigned Size, SMLoc L);
+ bool ParseDirectiveTC(unsigned Size, SMLoc L);
+ bool ParseDirectiveMachine(SMLoc L);
+ bool ParseDarwinDirectiveMachine(SMLoc L);
+ bool ParseDirectiveAbiVersion(SMLoc L);
+ bool ParseDirectiveLocalEntry(SMLoc L);
+
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) override;
+
+ void ProcessInstruction(MCInst &Inst, const OperandVector &Ops);
+
+ /// @name Auto-generated Match Functions
+ /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "PPCGenAsmMatcher.inc"
+
+ /// }
+
+
+public:
+ PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &_MII,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
+ // Check for 64-bit vs. 32-bit pointer mode.
+ Triple TheTriple(STI.getTargetTriple());
+ IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
+ IsDarwin = TheTriple.isMacOSX();
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+ }
+
+ bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) override;
+
+ bool ParseDirective(AsmToken DirectiveID) override;
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+ unsigned Kind) override;
+
+ const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx) override;
+};
+
+/// PPCOperand - Instances of this class represent a parsed PowerPC machine
+/// instruction.
+struct PPCOperand : public MCParsedAsmOperand {
+ enum KindTy {
+ Token,
+ Immediate,
+ Expression,
+ TLSRegister
+ } Kind;
+
+ SMLoc StartLoc, EndLoc;
+ bool IsPPC64;
+
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ };
+
+ struct ImmOp {
+ int64_t Val;
+ };
+
+ struct ExprOp {
+ const MCExpr *Val;
+ int64_t CRVal; // Cached result of EvaluateCRExpr(Val)
+ };
+
+ struct TLSRegOp {
+ const MCSymbolRefExpr *Sym;
+ };
+
+ union {
+ struct TokOp Tok;
+ struct ImmOp Imm;
+ struct ExprOp Expr;
+ struct TLSRegOp TLSReg;
+ };
+
+ PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+ PPCOperand(const PPCOperand &o) : MCParsedAsmOperand() {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ IsPPC64 = o.IsPPC64;
+ switch (Kind) {
+ case Token:
+ Tok = o.Tok;
+ break;
+ case Immediate:
+ Imm = o.Imm;
+ break;
+ case Expression:
+ Expr = o.Expr;
+ break;
+ case TLSRegister:
+ TLSReg = o.TLSReg;
+ break;
+ }
+ }
+
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const override { return EndLoc; }
+
+ /// isPPC64 - True if this operand is for an instruction in 64-bit mode.
+ bool isPPC64() const { return IsPPC64; }
+
+ int64_t getImm() const {
+ assert(Kind == Immediate && "Invalid access!");
+ return Imm.Val;
+ }
+
+ const MCExpr *getExpr() const {
+ assert(Kind == Expression && "Invalid access!");
+ return Expr.Val;
+ }
+
+ int64_t getExprCRVal() const {
+ assert(Kind == Expression && "Invalid access!");
+ return Expr.CRVal;
+ }
+
+ const MCExpr *getTLSReg() const {
+ assert(Kind == TLSRegister && "Invalid access!");
+ return TLSReg.Sym;
+ }
+
+ unsigned getReg() const override {
+ assert(isRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getVSReg() const {
+ assert(isVSRegNumber() && "Invalid access!");
+ return (unsigned) Imm.Val;
+ }
+
+ unsigned getCCReg() const {
+ assert(isCCRegNumber() && "Invalid access!");
+ return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
+ }
+
+ unsigned getCRBit() const {
+ assert(isCRBitNumber() && "Invalid access!");
+ return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
+ }
+
+ unsigned getCRBitMask() const {
+ assert(isCRBitMask() && "Invalid access!");
+ return 7 - countTrailingZeros<uint64_t>(Imm.Val);
+ }
+
+ bool isToken() const override { return Kind == Token; }
+ bool isImm() const override { return Kind == Immediate || Kind == Expression; }
+ bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
+ bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
+ bool isU6Imm() const { return Kind == Immediate && isUInt<6>(getImm()); }
+ bool isU16Imm() const { return Kind == Expression ||
+ (Kind == Immediate && isUInt<16>(getImm())); }
+ bool isS16Imm() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm())); }
+ bool isS16ImmX4() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isS17Imm() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<17>(getImm())); }
+ bool isTLSReg() const { return Kind == TLSRegister; }
+ bool isDirectBr() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<26>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isCondBr() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
+ bool isVSRegNumber() const { return Kind == Immediate && isUInt<6>(getImm()); }
+ bool isCCRegNumber() const { return (Kind == Expression
+ && isUInt<3>(getExprCRVal())) ||
+ (Kind == Immediate
+ && isUInt<3>(getImm())); }
+ bool isCRBitNumber() const { return (Kind == Expression
+ && isUInt<5>(getExprCRVal())) ||
+ (Kind == Immediate
+ && isUInt<5>(getImm())); }
+ bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
+ isPowerOf2_32(getImm()); }
+ bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
+
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ llvm_unreachable("addRegOperands");
+ }
+
+ void addRegGPRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(RRegs[getReg()]));
+ }
+
+ void addRegGPRCNoR0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(RRegsNoR0[getReg()]));
+ }
+
+ void addRegG8RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(XRegs[getReg()]));
+ }
+
+ void addRegG8RCNoX0Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(XRegsNoX0[getReg()]));
+ }
+
+ void addRegGxRCOperands(MCInst &Inst, unsigned N) const {
+ if (isPPC64())
+ addRegG8RCOperands(Inst, N);
+ else
+ addRegGPRCOperands(Inst, N);
+ }
+
+ void addRegGxRCNoR0Operands(MCInst &Inst, unsigned N) const {
+ if (isPPC64())
+ addRegG8RCNoX0Operands(Inst, N);
+ else
+ addRegGPRCNoR0Operands(Inst, N);
+ }
+
+ void addRegF4RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+ }
+
+ void addRegF8RCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(FRegs[getReg()]));
+ }
+
+ void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VRegs[getReg()]));
+ }
+
+ void addRegVSRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSRegs[getVSReg()]));
+ }
+
+ void addRegVSFRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(VSFRegs[getVSReg()]));
+ }
+
+ void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
+ }
+
+ void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[getCCReg()]));
+ }
+
+ void addCRBitMaskOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[getCRBitMask()]));
+ }
+
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::CreateImm(getImm()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ }
+
+ void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ if (Kind == Immediate)
+ Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ }
+
+ void addTLSRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateExpr(getTLSReg()));
+ }
+
+ StringRef getToken() const {
+ assert(Kind == Token && "Invalid access!");
+ return StringRef(Tok.Data, Tok.Length);
+ }
+
+ void print(raw_ostream &OS) const override;
+
+ static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Token);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) {
+ // Allocate extra memory for the string and copy it.
+ // FIXME: This is incorrect, Operands are owned by unique_ptr with a default
+ // deleter which will destroy them by simply using "delete", not correctly
+ // calling operator delete on this extra memory after calling the dtor
+ // explicitly.
+ void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
+ std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token));
+ Op->Tok.Data = (const char *)(Op.get() + 1);
+ Op->Tok.Length = Str.size();
+ std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size());
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E,
+ bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Immediate);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S,
+ SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(Expression);
+ Op->Expr.Val = Val;
+ Op->Expr.CRVal = EvaluateCRExpr(Val);
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) {
+ auto Op = make_unique<PPCOperand>(TLSRegister);
+ Op->TLSReg.Sym = Sym;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static std::unique_ptr<PPCOperand>
+ CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) {
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
+ return CreateImm(CE->getValue(), S, E, IsPPC64);
+
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Val))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
+ return CreateTLSReg(SRE, S, E, IsPPC64);
+
+ return CreateExpr(Val, S, E, IsPPC64);
+ }
+};
+
+} // end anonymous namespace.
+
+void PPCOperand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case Immediate:
+ OS << getImm();
+ break;
+ case Expression:
+ getExpr()->print(OS);
+ break;
+ case TLSRegister:
+ getTLSReg()->print(OS);
+ break;
+ }
+}
+
+void PPCAsmParser::ProcessInstruction(MCInst &Inst,
+ const OperandVector &Operands) {
+ int Opcode = Inst.getOpcode();
+ switch (Opcode) {
+ case PPC::LAx: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::LA);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBI: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDI);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBIS: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDIS);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBIC: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDIC);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBICo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDICo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTLWI:
+ case PPC::EXTLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTRWI:
+ case PPC::EXTRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B + N));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSLWI:
+ case PPC::INSLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - B));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSRWI:
+ case PPC::INSRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - (B + N)));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::ROTRWI:
+ case PPC::ROTRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLWI:
+ case PPC::SLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SRWI:
+ case PPC::SRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRRWI:
+ case PPC::CLRRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRLSLWI:
+ case PPC::CLRLSLWIo: {
+ MCInst TmpInst;
+ int64_t B = Inst.getOperand(2).getImm();
+ int64_t N = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(B - N));
+ TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTLDI:
+ case PPC::EXTLDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTRDI:
+ case PPC::EXTRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B + N));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSRDI:
+ case PPC::INSRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - (B + N)));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::ROTRDI:
+ case PPC::ROTRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLDI:
+ case PPC::SLDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SRDI:
+ case PPC::SRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRRDI:
+ case PPC::CLRRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRLSLDI:
+ case PPC::CLRLSLDIo: {
+ MCInst TmpInst;
+ int64_t B = Inst.getOperand(2).getImm();
+ int64_t N = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(B - N));
+ Inst = TmpInst;
+ break;
+ }
+ }
+}
+
+bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out, unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ MCInst Inst;
+
+ switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
+ default: break;
+ case Match_Success:
+ // Post-process instructions (typically extended mnemonics)
+ ProcessInstruction(Inst, Operands);
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ case Match_MissingFeature:
+ return Error(IDLoc, "instruction use requires an option to be enabled");
+ case Match_MnemonicFail:
+ return Error(IDLoc, "unrecognized instruction mnemonic");
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ }
+
+ return Error(ErrorLoc, "invalid operand for instruction");
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+}
+
+bool PPCAsmParser::
+MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef Name = Tok.getString();
+
+ if (Name.equals_lower("lr")) {
+ RegNo = isPPC64()? PPC::LR8 : PPC::LR;
+ IntVal = 8;
+ return false;
+ } else if (Name.equals_lower("ctr")) {
+ RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
+ IntVal = 9;
+ return false;
+ } else if (Name.equals_lower("vrsave")) {
+ RegNo = PPC::VRSAVE;
+ IntVal = 256;
+ return false;
+ } else if (Name.startswith_lower("r") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("f") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = FRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("v") &&
+ !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
+ RegNo = VRegs[IntVal];
+ return false;
+ } else if (Name.startswith_lower("cr") &&
+ !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
+ RegNo = CRRegs[IntVal];
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool PPCAsmParser::
+ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
+ const AsmToken &Tok = Parser.getTok();
+ StartLoc = Tok.getLoc();
+ EndLoc = Tok.getEndLoc();
+ RegNo = 0;
+ int64_t IntVal;
+
+ if (!MatchRegisterName(Tok, RegNo, IntVal)) {
+ Parser.Lex(); // Eat identifier token.
+ return false;
+ }
+
+ return Error(StartLoc, "invalid register name");
+}
+
+/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan
+/// the expression and check for VK_PPC_LO/HI/HA
+/// symbol variants. If all symbols with modifier use the same
+/// variant, return the corresponding PPCMCExpr::VariantKind,
+/// and a modified expression using the default symbol variant.
+/// Otherwise, return NULL.
+const MCExpr *PPCAsmParser::
+ExtractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant) {
+ MCContext &Context = getParser().getContext();
+ Variant = PPCMCExpr::VK_PPC_None;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return nullptr;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Variant = PPCMCExpr::VK_PPC_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Variant = PPCMCExpr::VK_PPC_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Variant = PPCMCExpr::VK_PPC_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ Variant = PPCMCExpr::VK_PPC_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ Variant = PPCMCExpr::VK_PPC_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ Variant = PPCMCExpr::VK_PPC_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ Variant = PPCMCExpr::VK_PPC_HIGHESTA;
+ break;
+ default:
+ return nullptr;
+ }
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return nullptr;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ PPCMCExpr::VariantKind LHSVariant, RHSVariant;
+ const MCExpr *LHS = ExtractModifierFromExpr(BE->getLHS(), LHSVariant);
+ const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant);
+
+ if (!LHS && !RHS)
+ return nullptr;
+
+ if (!LHS) LHS = BE->getLHS();
+ if (!RHS) RHS = BE->getRHS();
+
+ if (LHSVariant == PPCMCExpr::VK_PPC_None)
+ Variant = RHSVariant;
+ else if (RHSVariant == PPCMCExpr::VK_PPC_None)
+ Variant = LHSVariant;
+ else if (LHSVariant == RHSVariant)
+ Variant = LHSVariant;
+ else
+ return nullptr;
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// Find all VK_TLSGD/VK_TLSLD symbol references in expression and replace
+/// them by VK_PPC_TLSGD/VK_PPC_TLSLD. This is necessary to avoid having
+/// _GLOBAL_OFFSET_TABLE_ created via ELFObjectWriter::RelocNeedsGOT.
+/// FIXME: This is a hack.
+const MCExpr *PPCAsmParser::
+FixupVariantKind(const MCExpr *E) {
+ MCContext &Context = getParser().getContext();
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return E;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_TLSGD:
+ Variant = MCSymbolRefExpr::VK_PPC_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Variant = MCSymbolRefExpr::VK_PPC_TLSLD;
+ break;
+ default:
+ return E;
+ }
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = FixupVariantKind(UE->getSubExpr());
+ if (Sub == UE->getSubExpr())
+ return E;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = FixupVariantKind(BE->getLHS());
+ const MCExpr *RHS = FixupVariantKind(BE->getRHS());
+ if (LHS == BE->getLHS() && RHS == BE->getRHS())
+ return E;
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// ParseExpression. This differs from the default "parseExpression" in that
+/// it handles modifiers.
+bool PPCAsmParser::
+ParseExpression(const MCExpr *&EVal) {
+
+ if (isDarwin())
+ return ParseDarwinExpression(EVal);
+
+ // (ELF Platforms)
+ // Handle \code @l/@ha \endcode
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ EVal = FixupVariantKind(EVal);
+
+ PPCMCExpr::VariantKind Variant;
+ const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
+ if (E)
+ EVal = PPCMCExpr::Create(Variant, E, false, getParser().getContext());
+
+ return false;
+}
+
+/// ParseDarwinExpression. (MachO Platforms)
+/// This differs from the default "parseExpression" in that it handles detection
+/// of the \code hi16(), ha16() and lo16() \endcode modifiers. At present,
+/// parseExpression() doesn't recognise the modifiers when in the Darwin/MachO
+/// syntax form so it is done here. TODO: Determine if there is merit in arranging
+/// for this to be done at a higher level.
+bool PPCAsmParser::
+ParseDarwinExpression(const MCExpr *&EVal) {
+ PPCMCExpr::VariantKind Variant = PPCMCExpr::VK_PPC_None;
+ switch (getLexer().getKind()) {
+ default:
+ break;
+ case AsmToken::Identifier:
+ // Compiler-generated Darwin identifiers begin with L,l,_ or "; thus
+ // something starting with any other char should be part of the
+ // asm syntax. If handwritten asm includes an identifier like lo16,
+ // then all bets are off - but no-one would do that, right?
+ StringRef poss = Parser.getTok().getString();
+ if (poss.equals_lower("lo16")) {
+ Variant = PPCMCExpr::VK_PPC_LO;
+ } else if (poss.equals_lower("hi16")) {
+ Variant = PPCMCExpr::VK_PPC_HI;
+ } else if (poss.equals_lower("ha16")) {
+ Variant = PPCMCExpr::VK_PPC_HA;
+ }
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ Parser.Lex(); // Eat the xx16
+ if (getLexer().isNot(AsmToken::LParen))
+ return Error(Parser.getTok().getLoc(), "expected '('");
+ Parser.Lex(); // Eat the '('
+ }
+ break;
+ }
+
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ if (Variant != PPCMCExpr::VK_PPC_None) {
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "expected ')'");
+ Parser.Lex(); // Eat the ')'
+ EVal = PPCMCExpr::Create(Variant, EVal, false, getParser().getContext());
+ }
+ return false;
+}
+
+/// ParseOperand
+/// This handles registers in the form 'NN', '%rNN' for ELF platforms and
+/// rNN for MachO.
+bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
+ SMLoc S = Parser.getTok().getLoc();
+ SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+ const MCExpr *EVal;
+
+ // Attempt to parse the next token as an immediate
+ switch (getLexer().getKind()) {
+ // Special handling for register names. These are interpreted
+ // as immediates corresponding to the register number.
+ case AsmToken::Percent:
+ Parser.Lex(); // Eat the '%'.
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ return false;
+ }
+ return Error(S, "invalid register name");
+
+ case AsmToken::Identifier:
+ // Note that non-register-name identifiers from the compiler will begin
+ // with '_', 'L'/'l' or '"'. Of course, handwritten asm could include
+ // identifiers like r31foo - so we fall through in the event that parsing
+ // a register name fails.
+ if (isDarwin()) {
+ unsigned RegNo;
+ int64_t IntVal;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ return false;
+ }
+ }
+ // Fall-through to process non-register-name identifiers as expression.
+ // All other expressions
+ case AsmToken::LParen:
+ case AsmToken::Plus:
+ case AsmToken::Minus:
+ case AsmToken::Integer:
+ case AsmToken::Dot:
+ case AsmToken::Dollar:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
+ if (!ParseExpression(EVal))
+ break;
+ /* fall through */
+ default:
+ return Error(S, "unknown operand");
+ }
+
+ // Push the parsed operand into the list of operands
+ Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()));
+
+ // Check whether this is a TLS call expression
+ bool TLSCall = false;
+ if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(EVal))
+ TLSCall = Ref->getSymbol().getName() == "__tls_get_addr";
+
+ if (TLSCall && getLexer().is(AsmToken::LParen)) {
+ const MCExpr *TLSSym;
+
+ Parser.Lex(); // Eat the '('.
+ S = Parser.getTok().getLoc();
+ if (ParseExpression(TLSSym))
+ return Error(S, "invalid TLS call expression");
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "missing ')'");
+ E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
+ }
+
+ // Otherwise, check for D-form memory operands
+ if (!TLSCall && getLexer().is(AsmToken::LParen)) {
+ Parser.Lex(); // Eat the '('.
+ S = Parser.getTok().getLoc();
+
+ int64_t IntVal;
+ switch (getLexer().getKind()) {
+ case AsmToken::Percent:
+ Parser.Lex(); // Eat the '%'.
+ unsigned RegNo;
+ if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+ return Error(S, "invalid register name");
+ Parser.Lex(); // Eat the identifier token.
+ break;
+
+ case AsmToken::Integer:
+ if (!isDarwin()) {
+ if (getParser().parseAbsoluteExpression(IntVal) ||
+ IntVal < 0 || IntVal > 31)
+ return Error(S, "invalid register number");
+ } else {
+ return Error(S, "unexpected integer value");
+ }
+ break;
+
+ case AsmToken::Identifier:
+ if (isDarwin()) {
+ unsigned RegNo;
+ if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
+ Parser.Lex(); // Eat the identifier token.
+ break;
+ }
+ }
+ // Fall-through..
+
+ default:
+ return Error(S, "invalid memory operand");
+ }
+
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "missing ')'");
+ E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+ }
+
+ return false;
+}
+
+/// Parse an instruction mnemonic followed by its operands.
+bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+ SMLoc NameLoc, OperandVector &Operands) {
+ // The first operand is the token for the instruction name.
+ // If the next character is a '+' or '-', we need to add it to the
+ // instruction name, to match what TableGen is doing.
+ std::string NewOpcode;
+ if (getLexer().is(AsmToken::Plus)) {
+ getLexer().Lex();
+ NewOpcode = Name;
+ NewOpcode += '+';
+ Name = NewOpcode;
+ }
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex();
+ NewOpcode = Name;
+ NewOpcode += '-';
+ Name = NewOpcode;
+ }
+ // If the instruction ends in a '.', we need to create a separate
+ // token for it, to match what TableGen is doing.
+ size_t Dot = Name.find('.');
+ StringRef Mnemonic = Name.slice(0, Dot);
+ if (!NewOpcode.empty()) // Underlying memory for Name is volatile.
+ Operands.push_back(
+ PPCOperand::CreateTokenWithStringCopy(Mnemonic, NameLoc, isPPC64()));
+ else
+ Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+ if (Dot != StringRef::npos) {
+ SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
+ StringRef DotStr = Name.slice(Dot, StringRef::npos);
+ if (!NewOpcode.empty()) // Underlying memory for Name is volatile.
+ Operands.push_back(
+ PPCOperand::CreateTokenWithStringCopy(DotStr, DotLoc, isPPC64()));
+ else
+ Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+ }
+
+ // If there are no more operands then finish
+ if (getLexer().is(AsmToken::EndOfStatement))
+ return false;
+
+ // Parse the first operand
+ if (ParseOperand(Operands))
+ return true;
+
+ while (getLexer().isNot(AsmToken::EndOfStatement) &&
+ getLexer().is(AsmToken::Comma)) {
+ // Consume the comma token
+ getLexer().Lex();
+
+ // Parse the next operand
+ if (ParseOperand(Operands))
+ return true;
+ }
+
+ return false;
+}
+
+/// ParseDirective parses the PPC specific directives
+bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
+ StringRef IDVal = DirectiveID.getIdentifier();
+ if (!isDarwin()) {
+ if (IDVal == ".word")
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ if (IDVal == ".llong")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
+ if (IDVal == ".tc")
+ return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
+ if (IDVal == ".abiversion")
+ return ParseDirectiveAbiVersion(DirectiveID.getLoc());
+ if (IDVal == ".localentry")
+ return ParseDirectiveLocalEntry(DirectiveID.getLoc());
+ } else {
+ if (IDVal == ".machine")
+ return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
+ }
+ return true;
+}
+
+/// ParseDirectiveWord
+/// ::= .word [ expression (, expression)* ]
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ for (;;) {
+ const MCExpr *Value;
+ if (getParser().parseExpression(Value))
+ return false;
+
+ getParser().getStreamer().EmitValue(Value, Size);
+
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
+ Parser.Lex();
+ }
+ }
+
+ Parser.Lex();
+ return false;
+}
+
+/// ParseDirectiveTC
+/// ::= .tc [ symbol (, expression)* ]
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+ // Skip TC symbol, which is only used with XCOFF.
+ while (getLexer().isNot(AsmToken::EndOfStatement)
+ && getLexer().isNot(AsmToken::Comma))
+ Parser.Lex();
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Parser.Lex();
+
+ // Align to word size.
+ getParser().getStreamer().EmitValueToAlignment(Size);
+
+ // Emit expressions.
+ return ParseDirectiveWord(Size, L);
+}
+
+/// ParseDirectiveMachine (ELF platforms)
+/// ::= .machine [ cpu | "push" | "pop" ]
+bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: Right now, the parser always allows any available
+ // instruction, so the .machine directive is not useful.
+ // Implement ".machine any" (by doing nothing) for the benefit
+ // of existing assembler code. Likewise, we can then implement
+ // ".machine push" and ".machine pop" as no-op.
+ if (CPU != "any" && CPU != "push" && CPU != "pop") {
+ Error(L, "unrecognized machine type");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitMachine(CPU);
+
+ return false;
+}
+
+/// ParseDarwinDirectiveMachine (Mach-o platforms)
+/// ::= .machine cpu-identifier
+bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: this is only the 'default' set of cpu variants.
+ // However we don't act on this information at present, this is simply
+ // allowing parsing to proceed with minimal sanity checking.
+ if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") {
+ Error(L, "unrecognized cpu type");
+ return false;
+ }
+
+ if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) {
+ Error(L, "wrong cpu type specified for 64bit");
+ return false;
+ }
+ if (!isPPC64() && CPU == "ppc64") {
+ Error(L, "wrong cpu type specified for 32bit");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ return false;
+}
+
+/// ParseDirectiveAbiVersion
+/// ::= .abiversion constant-expression
+bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
+ int64_t AbiVersion;
+ if (getParser().parseAbsoluteExpression(AbiVersion)){
+ Error(L, "expected constant expression");
+ return false;
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitAbiVersion(AbiVersion);
+
+ return false;
+}
+
+/// ParseDirectiveLocalEntry
+/// ::= .localentry symbol, expression
+bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name)) {
+ Error(L, "expected identifier in directive");
+ return false;
+ }
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+ Lex();
+
+ const MCExpr *Expr;
+ if (getParser().parseExpression(Expr)) {
+ Error(L, "expected expression");
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Error(L, "unexpected token in directive");
+ return false;
+ }
+
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(
+ getParser().getStreamer().getTargetStreamer());
+ TStreamer.emitLocalEntry(Sym, Expr);
+
+ return false;
+}
+
+
+
+/// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmParser() {
+ RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
+ RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+ RegisterMCAsmParser<PPCAsmParser> C(ThePPC64LETarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "PPCGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+ unsigned Kind) {
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ int64_t ImmVal;
+ switch (Kind) {
+ case MCK_0: ImmVal = 0; break;
+ case MCK_1: ImmVal = 1; break;
+ case MCK_2: ImmVal = 2; break;
+ case MCK_3: ImmVal = 3; break;
+ default: return Match_InvalidOperand;
+ }
+
+ PPCOperand &Op = static_cast<PPCOperand &>(AsmOp);
+ if (Op.isImm() && Op.getImm() == ImmVal)
+ return Match_Success;
+
+ return Match_InvalidOperand;
+}
+
+const MCExpr *
+PPCAsmParser::applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant,
+ MCContext &Ctx) {
+ switch (Variant) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HI:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ default:
+ return nullptr;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt b/contrib/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..ca457df88d3e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMPowerPCDisassembler
+ PPCDisassembler.cpp
+ )
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/contrib/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..b0978c227ae9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/PowerPC/Disassembler/LLVMBuild.txt ---------*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = PowerPCDisassembler
+parent = PowerPC
+required_libraries = MC PowerPCInfo Support
+add_to_library_groups = PowerPC
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/Makefile b/contrib/llvm/lib/Target/PowerPC/Disassembler/Makefile
new file mode 100644
index 000000000000..86e3b4752207
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/PowerPC/Disassembler/Makefile -----------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDisassembler
+
+# Hack: we need to include 'main' PPC target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
new file mode 100644
index 000000000000..a2305a9efc71
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -0,0 +1,348 @@
+//===------ PPCDisassembler.cpp - Disassembler for PowerPC ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+class PPCDisassembler : public MCDisassembler {
+public:
+ PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+ virtual ~PPCDisassembler() {}
+
+ // Override MCDisassembler.
+ virtual DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
+};
+} // end anonymous namespace
+
+static MCDisassembler *createPPCDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCDisassembler(STI, Ctx);
+}
+
+extern "C" void LLVMInitializePowerPCDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(ThePPC32Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
+ createPPCDisassembler);
+ TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
+ createPPCDisassembler);
+}
+
+// FIXME: These can be generated by TableGen from the existing register
+// encoding values!
+
+static const unsigned CRRegs[] = {
+ PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3,
+ PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
+};
+
+static const unsigned CRBITRegs[] = {
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN,
+ PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN,
+ PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+ PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+ PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN,
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN,
+ PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN
+};
+
+static const unsigned FRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31
+};
+
+static const unsigned VRegs[] = {
+ PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+ PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+ PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+ PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+ PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+ PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+static const unsigned VSRegs[] = {
+ PPC::VSL0, PPC::VSL1, PPC::VSL2, PPC::VSL3,
+ PPC::VSL4, PPC::VSL5, PPC::VSL6, PPC::VSL7,
+ PPC::VSL8, PPC::VSL9, PPC::VSL10, PPC::VSL11,
+ PPC::VSL12, PPC::VSL13, PPC::VSL14, PPC::VSL15,
+ PPC::VSL16, PPC::VSL17, PPC::VSL18, PPC::VSL19,
+ PPC::VSL20, PPC::VSL21, PPC::VSL22, PPC::VSL23,
+ PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
+ PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
+
+ PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
+ PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
+ PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
+ PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
+ PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
+ PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
+ PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
+ PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+};
+
+static const unsigned VSFRegs[] = {
+ PPC::F0, PPC::F1, PPC::F2, PPC::F3,
+ PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11,
+ PPC::F12, PPC::F13, PPC::F14, PPC::F15,
+ PPC::F16, PPC::F17, PPC::F18, PPC::F19,
+ PPC::F20, PPC::F21, PPC::F22, PPC::F23,
+ PPC::F24, PPC::F25, PPC::F26, PPC::F27,
+ PPC::F28, PPC::F29, PPC::F30, PPC::F31,
+
+ PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+ PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+ PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+ PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+ PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+ PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+ PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+ PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
+static const unsigned GPRegs[] = {
+ PPC::R0, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned GP0Regs[] = {
+ PPC::ZERO, PPC::R1, PPC::R2, PPC::R3,
+ PPC::R4, PPC::R5, PPC::R6, PPC::R7,
+ PPC::R8, PPC::R9, PPC::R10, PPC::R11,
+ PPC::R12, PPC::R13, PPC::R14, PPC::R15,
+ PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+ PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+ PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+ PPC::R28, PPC::R29, PPC::R30, PPC::R31
+};
+
+static const unsigned G8Regs[] = {
+ PPC::X0, PPC::X1, PPC::X2, PPC::X3,
+ PPC::X4, PPC::X5, PPC::X6, PPC::X7,
+ PPC::X8, PPC::X9, PPC::X10, PPC::X11,
+ PPC::X12, PPC::X13, PPC::X14, PPC::X15,
+ PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+ PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+ PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+ PPC::X28, PPC::X29, PPC::X30, PPC::X31
+};
+
+template <std::size_t N>
+static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
+ const unsigned (&Regs)[N]) {
+ assert(RegNo < N && "Invalid register number");
+ Inst.addOperand(MCOperand::CreateReg(Regs[RegNo]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeCRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRRegs);
+}
+
+static DecodeStatus DecodeCRBITRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, CRBITRegs);
+}
+
+static DecodeStatus DecodeF4RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, FRegs);
+}
+
+static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VRegs);
+}
+
+static DecodeStatus DecodeVSRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSRegs);
+}
+
+static DecodeStatus DecodeVSFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, VSFRegs);
+}
+
+static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GPRegs);
+}
+
+static DecodeStatus DecodeGPRC_NOR0RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, GP0Regs);
+}
+
+static DecodeStatus DecodeG8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, G8Regs);
+}
+
+#define DecodePointerLikeRegClass0 DecodeGPRCRegisterClass
+#define DecodePointerLikeRegClass1 DecodeGPRC_NOR0RegisterClass
+
+template<unsigned N>
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ return MCDisassembler::Success;
+}
+
+template<unsigned N>
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ assert(isUInt<N>(Imm) && "Invalid immediate");
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memri field (imm, reg), which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 16;
+ uint64_t Disp = Imm & 0xFFFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ switch (Inst.getOpcode()) {
+ default: break;
+ case PPC::LBZU:
+ case PPC::LHAU:
+ case PPC::LHZU:
+ case PPC::LWZU:
+ case PPC::LFSU:
+ case PPC::LFDU:
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ break;
+ case PPC::STBU:
+ case PPC::STHU:
+ case PPC::STWU:
+ case PPC::STFSU:
+ case PPC::STFDU:
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+ break;
+ }
+
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp)));
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // Decode the memrix field (imm, reg), which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+
+ uint64_t Base = Imm >> 14;
+ uint64_t Disp = Imm & 0x3FFF;
+
+ assert(Base < 32 && "Invalid base register");
+
+ if (Inst.getOpcode() == PPC::LDU)
+ // Add the tied output operand.
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ else if (Inst.getOpcode() == PPC::STDU)
+ Inst.insert(Inst.begin(), MCOperand::CreateReg(GP0Regs[Base]));
+
+ Inst.addOperand(MCOperand::CreateImm(SignExtend64<16>(Disp << 2)));
+ Inst.addOperand(MCOperand::CreateReg(GP0Regs[Base]));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
+ int64_t Address, const void *Decoder) {
+ // The cr bit encoding is 0x80 >> cr_reg_num.
+
+ unsigned Zeros = countTrailingZeros(Imm);
+ assert(Zeros < 8 && "Invalid CR bit value");
+
+ Inst.addOperand(MCOperand::CreateReg(CRRegs[7 - Zeros]));
+ return MCDisassembler::Success;
+}
+
+#include "PPCGenDisassemblerTables.inc"
+
+DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
+ // Get the four bytes of the instruction.
+ uint8_t Bytes[4];
+ Size = 4;
+ if (Region.readBytes(Address, Size, Bytes) == -1) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+
+ // The instruction is big-endian encoded.
+ uint32_t Inst = (Bytes[0] << 24) |
+ (Bytes[1] << 16) |
+ (Bytes[2] << 8) |
+ (Bytes[3] << 0);
+
+ return decodeInstruction(DecoderTable32, MI, Inst, Address, this, STI);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 000000000000..771b6f5ec487
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,365 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstPrinter.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asm-printer"
+
+// FIXME: Once the integrated assembler supports full register names, tie this
+// to the verbose-asm setting.
+static cl::opt<bool>
+FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
+ cl::desc("Use full register names when printing assembly"));
+
+#include "PPCGenAsmWriter.inc"
+
+void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ OS << getRegisterName(RegNo);
+}
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ // Check for slwi/srwi mnemonics.
+ if (MI->getOpcode() == PPC::RLWINM) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char MB = MI->getOperand(3).getImm();
+ unsigned char ME = MI->getOperand(4).getImm();
+ bool useSubstituteMnemonic = false;
+ if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+ O << "\tslwi "; useSubstituteMnemonic = true;
+ }
+ if (SH <= 31 && MB == (32-SH) && ME == 31) {
+ O << "\tsrwi "; useSubstituteMnemonic = true;
+ SH = 32-SH;
+ }
+ if (useSubstituteMnemonic) {
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+ MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ O << "\tmr ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ printAnnotation(O, Annot);
+ return;
+ }
+
+ if (MI->getOpcode() == PPC::RLDICR) {
+ unsigned char SH = MI->getOperand(2).getImm();
+ unsigned char ME = MI->getOperand(3).getImm();
+ // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+ if (63-SH == ME) {
+ O << "\tsldi ";
+ printOperand(MI, 0, O);
+ O << ", ";
+ printOperand(MI, 1, O);
+ O << ", " << (unsigned int)SH;
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
+
+ // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is
+ // used when converting a 32-bit float to a 64-bit float as part of
+ // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()),
+ // as otherwise we have problems with incorrect register classes
+ // in machine instruction verification. For now, just avoid trying
+ // to print it as such an instruction has no effect (a 32-bit float
+ // in a register is already in 64-bit form, just with lower
+ // precision). FIXME: Is there a better solution?
+ if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O,
+ const char *Modifier) {
+ unsigned Code = MI->getOperand(OpNo).getImm();
+
+ if (StringRef(Modifier) == "cc") {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LT_PLUS:
+ case PPC::PRED_LT:
+ O << "lt";
+ return;
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_LE_PLUS:
+ case PPC::PRED_LE:
+ O << "le";
+ return;
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_EQ_PLUS:
+ case PPC::PRED_EQ:
+ O << "eq";
+ return;
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GE_PLUS:
+ case PPC::PRED_GE:
+ O << "ge";
+ return;
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_GT_PLUS:
+ case PPC::PRED_GT:
+ O << "gt";
+ return;
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_NE_PLUS:
+ case PPC::PRED_NE:
+ O << "ne";
+ return;
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_UN_PLUS:
+ case PPC::PRED_UN:
+ O << "un";
+ return;
+ case PPC::PRED_NU_MINUS:
+ case PPC::PRED_NU_PLUS:
+ case PPC::PRED_NU:
+ O << "nu";
+ return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
+ }
+ llvm_unreachable("Invalid predicate code");
+ }
+
+ if (StringRef(Modifier) == "pm") {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_LT:
+ case PPC::PRED_LE:
+ case PPC::PRED_EQ:
+ case PPC::PRED_GE:
+ case PPC::PRED_GT:
+ case PPC::PRED_NE:
+ case PPC::PRED_UN:
+ case PPC::PRED_NU:
+ return;
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_NU_MINUS:
+ O << "-";
+ return;
+ case PPC::PRED_LT_PLUS:
+ case PPC::PRED_LE_PLUS:
+ case PPC::PRED_EQ_PLUS:
+ case PPC::PRED_GE_PLUS:
+ case PPC::PRED_GT_PLUS:
+ case PPC::PRED_NE_PLUS:
+ case PPC::PRED_UN_PLUS:
+ case PPC::PRED_NU_PLUS:
+ O << "+";
+ return;
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
+ }
+ llvm_unreachable("Invalid predicate code");
+ }
+
+ assert(StringRef(Modifier) == "reg" &&
+ "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 3 && "Invalid u2imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ int Value = MI->getOperand(OpNo).getImm();
+ Value = SignExtend32<5>(Value);
+ O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 31 && "Invalid u5imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned int Value = MI->getOperand(OpNo).getImm();
+ assert(Value <= 63 && "Invalid u6imm argument!");
+ O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)MI->getOperand(OpNo).getImm();
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm())
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ else
+ printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ // Branches can take an immediate operand. This is used by the branch
+ // selection pass to print .+8, an eight byte displacement from the PC.
+ O << ".+";
+ printAbsBranchOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
+ O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned CCReg = MI->getOperand(OpNo).getReg();
+ unsigned RegNo;
+ switch (CCReg) {
+ default: llvm_unreachable("Unknown CR register");
+ case PPC::CR0: RegNo = 0; break;
+ case PPC::CR1: RegNo = 1; break;
+ case PPC::CR2: RegNo = 2; break;
+ case PPC::CR3: RegNo = 3; break;
+ case PPC::CR4: RegNo = 4; break;
+ case PPC::CR5: RegNo = 5; break;
+ case PPC::CR6: RegNo = 6; break;
+ case PPC::CR7: RegNo = 7; break;
+ }
+ O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printS16ImmOperand(MI, OpNo, O);
+ O << '(';
+ if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ if (MI->getOperand(OpNo).getReg() == PPC::R0)
+ O << "0";
+ else
+ printOperand(MI, OpNo, O);
+ O << ", ";
+ printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ // On PPC64, VariantKind is VK_None, but on PPC32, it's VK_PLT, and it must
+ // come at the _end_ of the expression.
+ const MCOperand &Op = MI->getOperand(OpNo);
+ const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*Op.getExpr());
+ O << refExp.getSymbol().getName();
+ O << '(';
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+ if (refExp.getKind() != MCSymbolRefExpr::VK_None)
+ O << '@' << MCSymbolRefExpr::getVariantKindName(refExp.getKind());
+}
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ if (FullRegNames)
+ return RegName;
+
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ const char *RegName = getRegisterName(Op.getReg());
+ // The linux and AIX assembler does not take register prefixes.
+ if (!isDarwinSyntax())
+ RegName = stripRegisterPrefix(RegName);
+
+ O << RegName;
+ return;
+ }
+
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 000000000000..211a62813e7a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,63 @@
+//===- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+
+class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+ bool IsDarwin;
+public:
+ PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, bool isDarwin)
+ : MCInstPrinter(MAI, MII, MRI), IsDarwin(isDarwin) {}
+
+ bool isDarwinSyntax() const {
+ return IsDarwin;
+ }
+
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+
+ // Autogenerated by tblgen.
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O, const char *Modifier = nullptr);
+
+ void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+ void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
new file mode 100644
index 000000000000..c54d5e75bdfd
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -0,0 +1,245 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
+ case FK_Data_8:
+ case PPC::fixup_ppc_nofixup:
+ return Value;
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ return Value & 0xfffc;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ return Value & 0x3fffffc;
+ case PPC::fixup_ppc_half16:
+ return Value & 0xffff;
+ case PPC::fixup_ppc_half16ds:
+ return Value & 0xfffc;
+ }
+}
+
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_half16ds:
+ return 2;
+ case FK_Data_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ return 4;
+ case FK_Data_8:
+ return 8;
+ case PPC::fixup_ppc_nofixup:
+ return 0;
+ }
+}
+
+namespace {
+
+class PPCAsmBackend : public MCAsmBackend {
+ const Target &TheTarget;
+ bool IsLittleEndian;
+public:
+ PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T),
+ IsLittleEndian(isLittle) {}
+
+ unsigned getNumFixupKinds() const override {
+ return PPC::NumTargetFixupKinds;
+ }
+
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
+ const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24abs", 6, 24, 0 },
+ { "fixup_ppc_brcond14abs", 16, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 0, 14, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
+ const static MCFixupKindInfo InfosLE[PPC::NumTargetFixupKinds] = {
+ // name offset bits flags
+ { "fixup_ppc_br24", 2, 24, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_brcond14", 2, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_ppc_br24abs", 2, 24, 0 },
+ { "fixup_ppc_brcond14abs", 2, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 2, 14, 0 },
+ { "fixup_ppc_nofixup", 0, 0, 0 }
+ };
+
+ if (Kind < FirstTargetFixupKind)
+ return MCAsmBackend::getFixupKindInfo(Kind);
+
+ assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+ "Invalid kind!");
+ return (IsLittleEndian? InfosLE : InfosBE)[Kind - FirstTargetFixupKind];
+ }
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override {
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+ if (!Value) return; // Doesn't change encoding.
+
+ unsigned Offset = Fixup.getOffset();
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+
+ // For each byte of the fragment that the fixup touches, mask in the bits
+ // from the fixup value. The Value has been "split up" into the appropriate
+ // bitfields above.
+ for (unsigned i = 0; i != NumBytes; ++i) {
+ unsigned Idx = IsLittleEndian ? i : (NumBytes - 1 - i);
+ Data[Offset + i] |= uint8_t((Value >> (Idx * 8)) & 0xff);
+ }
+ }
+
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override {
+ switch ((PPC::Fixups)Fixup.getKind()) {
+ default: break;
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ // If the target symbol has a local entry point we must not attempt
+ // to resolve the fixup directly. Emit a relocation and leave
+ // resolution of the final target address to the linker.
+ if (const MCSymbolRefExpr *A = Target.getSymA()) {
+ const MCSymbolData &Data = Asm.getSymbolData(A->getSymbol());
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
+ IsResolved = false;
+ }
+ break;
+ }
+ }
+
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
+ // FIXME.
+ return false;
+ }
+
+ bool fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
+ // FIXME.
+ llvm_unreachable("relaxInstruction() unimplemented");
+ }
+
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0x60000000);
+
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0); break;
+ }
+
+ return true;
+ }
+
+ unsigned getPointerSize() const {
+ StringRef Name = TheTarget.getName();
+ if (Name == "ppc64" || Name == "ppc64le") return 8;
+ assert(Name == "ppc32" && "Unknown target name!");
+ return 4;
+ }
+
+ bool isLittleEndian() const {
+ return IsLittleEndian;
+ }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+ class DarwinPPCAsmBackend : public PPCAsmBackend {
+ public:
+ DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ bool is64 = getPointerSize() == 8;
+ return createPPCMachObjectWriter(
+ OS,
+ /*Is64Bit=*/is64,
+ (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
+ }
+ };
+
+ class ELFPPCAsmBackend : public PPCAsmBackend {
+ uint8_t OSABI;
+ public:
+ ELFPPCAsmBackend(const Target &T, bool IsLittleEndian, uint8_t OSABI) :
+ PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
+
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ bool is64 = getPointerSize() == 8;
+ return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
+ }
+ };
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
+ if (Triple(TT).isOSDarwin())
+ return new DarwinPPCAsmBackend(T);
+
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ bool IsLittleEndian = Triple(TT).getArch() == Triple::ppc64le;
+ return new ELFPPCAsmBackend(T, IsLittleEndian, OSABI);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
new file mode 100644
index 000000000000..ca813176bd5a
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -0,0 +1,432 @@
+//===-- PPCELFObjectWriter.cpp - PPC ELF Writer ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+ class PPCELFObjectWriter : public MCELFObjectTargetWriter {
+ public:
+ PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI);
+
+ virtual ~PPCELFObjectWriter();
+ protected:
+ virtual unsigned getRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
+ unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsPCRel) const override;
+
+ bool needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const override;
+ };
+}
+
+PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
+ : MCELFObjectTargetWriter(Is64Bit, OSABI,
+ Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
+ /*HasRelocationAddend*/ true) {}
+
+PPCELFObjectWriter::~PPCELFObjectWriter() {
+}
+
+static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
+ const MCFixup &Fixup) {
+ const MCExpr *Expr = Fixup.getValue();
+
+ if (Expr->getKind() != MCExpr::Target)
+ return Target.getAccessVariant();
+
+ switch (cast<PPCMCExpr>(Expr)->getKind()) {
+ case PPCMCExpr::VK_PPC_None:
+ return MCSymbolRefExpr::VK_None;
+ case PPCMCExpr::VK_PPC_LO:
+ return MCSymbolRefExpr::VK_PPC_LO;
+ case PPCMCExpr::VK_PPC_HI:
+ return MCSymbolRefExpr::VK_PPC_HI;
+ case PPCMCExpr::VK_PPC_HA:
+ return MCSymbolRefExpr::VK_PPC_HA;
+ case PPCMCExpr::VK_PPC_HIGHERA:
+ return MCSymbolRefExpr::VK_PPC_HIGHERA;
+ case PPCMCExpr::VK_PPC_HIGHER:
+ return MCSymbolRefExpr::VK_PPC_HIGHER;
+ case PPCMCExpr::VK_PPC_HIGHEST:
+ return MCSymbolRefExpr::VK_PPC_HIGHEST;
+ case PPCMCExpr::VK_PPC_HIGHESTA:
+ return MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ }
+ llvm_unreachable("unknown PPCMCExpr kind");
+}
+
+unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const
+{
+ MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
+
+ // determine the type of the relocation
+ unsigned Type;
+ if (IsPCRel) {
+ switch ((unsigned)Fixup.getKind()) {
+ default:
+ llvm_unreachable("Unimplemented");
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL24;
+ break;
+ case MCSymbolRefExpr::VK_PLT:
+ Type = ELF::R_PPC_PLTREL24;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ Type = ELF::R_PPC_REL14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC_REL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = ELF::R_PPC_REL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = ELF::R_PPC_REL16_HA;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ case FK_PCRel_4:
+ Type = ELF::R_PPC_REL32;
+ break;
+ case FK_Data_8:
+ case FK_PCRel_8:
+ Type = ELF::R_PPC64_REL64;
+ break;
+ }
+ } else {
+ switch ((unsigned)Fixup.getKind()) {
+ default: llvm_unreachable("invalid fixup kind!");
+ case PPC::fixup_ppc_br24abs:
+ Type = ELF::R_PPC_ADDR24;
+ break;
+ case PPC::fixup_ppc_brcond14abs:
+ Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = ELF::R_PPC_ADDR16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = ELF::R_PPC_ADDR16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ Type = ELF::R_PPC64_ADDR16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ Type = ELF::R_PPC64_ADDR16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ Type = ELF::R_PPC64_ADDR16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ Type = ELF::R_PPC64_ADDR16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_PPC_GOT16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ Type = ELF::R_PPC_GOT16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_HI:
+ Type = ELF::R_PPC_GOT16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_HA:
+ Type = ELF::R_PPC_GOT16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC:
+ Type = ELF::R_PPC64_TOC16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_LO:
+ Type = ELF::R_PPC64_TOC16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_HI:
+ Type = ELF::R_PPC64_TOC16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_HA:
+ Type = ELF::R_PPC64_TOC16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC_TPREL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_LO:
+ Type = ELF::R_PPC_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HI:
+ Type = ELF::R_PPC_TPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HA:
+ Type = ELF::R_PPC_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHER:
+ Type = ELF::R_PPC64_TPREL16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHERA:
+ Type = ELF::R_PPC64_TPREL16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHEST:
+ Type = ELF::R_PPC64_TPREL16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHESTA:
+ Type = ELF::R_PPC64_TPREL16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HI:
+ Type = ELF::R_PPC64_DTPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHER:
+ Type = ELF::R_PPC64_DTPREL16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHERA:
+ Type = ELF::R_PPC64_DTPREL16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHEST:
+ Type = ELF::R_PPC64_DTPREL16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHESTA:
+ Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSGD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSGD16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
+ Type = ELF::R_PPC64_GOT_TLSGD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_GOT_TLSLD16;
+ else
+ Type = ELF::R_PPC_GOT_TLSLD16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
+ Type = ELF::R_PPC64_GOT_TLSLD16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL:
+ /* We don't have R_PPC64_GOT_TPREL16, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_TPREL16_DS. */
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
+ /* We don't have R_PPC64_GOT_TPREL16_LO, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_TPREL16_LO_DS. */
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI:
+ Type = ELF::R_PPC64_GOT_TPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
+ /* We don't have R_PPC64_GOT_DTPREL16, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_DS. */
+ Type = ELF::R_PPC64_GOT_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
+ /* We don't have R_PPC64_GOT_DTPREL16_LO, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_LO_DS. */
+ Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI:
+ Type = ELF::R_PPC64_GOT_DTPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA:
+ Type = ELF::R_PPC64_GOT_DTPREL16_HA;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_half16ds:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC64_ADDR16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_PPC64_GOT16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ Type = ELF::R_PPC64_GOT16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC:
+ Type = ELF::R_PPC64_TOC16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_LO:
+ Type = ELF::R_PPC64_TOC16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC64_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_LO:
+ Type = ELF::R_PPC64_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL:
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
+ Type = ELF::R_PPC64_GOT_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
+ Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS;
+ break;
+ }
+ break;
+ case PPC::fixup_ppc_nofixup:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TLSGD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSGD;
+ else
+ Type = ELF::R_PPC_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLSLD:
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLSLD;
+ else
+ Type = ELF::R_PPC_TLSLD;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TLS:
+ if (is64Bit())
+ Type = ELF::R_PPC64_TLS;
+ else
+ Type = ELF::R_PPC_TLS;
+ break;
+ }
+ break;
+ case FK_Data_8:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_PPC_TOCBASE:
+ Type = ELF::R_PPC64_TOC;
+ break;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC64_ADDR64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPMOD:
+ Type = ELF::R_PPC64_DTPMOD64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC64_TPREL64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL64;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ Type = ELF::R_PPC_ADDR32;
+ break;
+ case FK_Data_2:
+ Type = ELF::R_PPC_ADDR16;
+ break;
+ }
+ }
+ return Type;
+}
+
+unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ return getRelocTypeInner(Target, Fixup, IsPCRel);
+}
+
+bool PPCELFObjectWriter::needsRelocateWithSymbol(const MCSymbolData &SD,
+ unsigned Type) const {
+ switch (Type) {
+ default:
+ return false;
+
+ case ELF::R_PPC_REL24:
+ // If the target symbol has a local entry point, we must keep the
+ // target symbol to preserve that information for the linker.
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(SD) << 2;
+ return (Other & ELF::STO_PPC64_LOCAL_MASK) != 0;
+ }
+}
+
+MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ bool IsLittleEndian,
+ uint8_t OSABI) {
+ MCELFObjectTargetWriter *MOTW = new PPCELFObjectWriter(Is64Bit, OSABI);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
new file mode 100644
index 000000000000..68de8c1f27a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -0,0 +1,56 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+#undef PPC
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+ // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+ // and 'bl'.
+ fixup_ppc_br24 = FirstTargetFixupKind,
+
+ /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14,
+
+ /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches
+ /// like 'ba' and 'bla'.
+ fixup_ppc_br24abs,
+
+ /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14abs,
+
+ /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
+ /// or ha16(_foo) for instrs like 'li' or 'addis'.
+ fixup_ppc_half16,
+
+ /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// implied 2 zero bits for instrs like 'std'.
+ fixup_ppc_half16ds,
+
+ /// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
+ /// to __tls_get_addr for the TLS general and local dynamic models,
+ /// or inserts the thread-pointer register number.
+ fixup_ppc_nofixup,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
new file mode 100644
index 000000000000..b95a2ac13e04
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -0,0 +1,82 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+void PPCMCAsmInfoDarwin::anchor() { }
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = false;
+
+ CommentString = ";";
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ if (!is64Bit)
+ Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
+
+ AssemblerDialect = 1; // New-Style mnemonics.
+ SupportsDebugInformation= true; // Debug information.
+
+ // The installed assembler for OSX < 10.6 lacks some directives.
+ // FIXME: this should really be a check on the assembler characteristics
+ // rather than OS version
+ if (T.isMacOSX() && T.isMacOSXVersionLT(10, 6))
+ HasWeakDefCanBeHiddenDirective = false;
+
+ UseIntegratedAssembler = true;
+}
+
+void PPCLinuxMCAsmInfo::anchor() { }
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
+ if (is64Bit) {
+ PointerSize = CalleeSaveStackSlotSize = 8;
+ }
+ IsLittleEndian = T.getArch() == Triple::ppc64le;
+
+ // ".comm align is in bytes but .align is pow-2."
+ AlignmentIsInBytes = false;
+
+ CommentString = "#";
+
+ // Uses '.section' before '.bss' directive
+ UsesELFSectionDirectiveForBSS = true;
+
+ // Debug Information
+ SupportsDebugInformation = true;
+
+ DollarIsPC = true;
+
+ // Set up DWARF directives
+ HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ MinInstAlignment = 4;
+
+ // Exceptions handling
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
+ AssemblerDialect = 1; // New-Style mnemonics.
+
+ if (T.getOS() == llvm::Triple::FreeBSD ||
+ (T.getOS() == llvm::Triple::NetBSD && !is64Bit) ||
+ (T.getOS() == llvm::Triple::OpenBSD && !is64Bit))
+ UseIntegratedAssembler = true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
new file mode 100644
index 000000000000..754330b2c60f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -0,0 +1,37 @@
+//===-- PPCMCAsmInfo.h - PPC asm properties --------------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+ class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+ void anchor() override;
+ public:
+ explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
+ };
+
+ class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
+ void anchor() override;
+ public:
+ explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
new file mode 100644
index 000000000000..435a93f78c1d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -0,0 +1,322 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+ PPCMCCodeEmitter(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+ void operator=(const PPCMCCodeEmitter &) LLVM_DELETED_FUNCTION;
+
+ const MCInstrInfo &MCII;
+ const MCContext &CTX;
+ bool IsLittleEndian;
+
+public:
+ PPCMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx, bool isLittle)
+ : MCII(mcii), CTX(ctx), IsLittleEndian(isLittle) {
+ }
+
+ ~PPCMCCodeEmitter() {}
+
+ unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override {
+ // For fast-isel, a float COPY_TO_REGCLASS can survive this long.
+ // It's just a nop to keep the register classes happy, so don't
+ // generate anything.
+ unsigned Opcode = MI.getOpcode();
+ const MCInstrDesc &Desc = MCII.get(Opcode);
+ if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
+ uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+
+ // Output the constant in big/little endian byte order.
+ unsigned Size = Desc.getSize();
+ switch (Size) {
+ case 4:
+ if (IsLittleEndian) {
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
+ }
+ break;
+ case 8:
+ // If we emit a pair of instructions, the first one is
+ // always in the top 32 bits, even on little-endian.
+ if (IsLittleEndian) {
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 24);
+ } else {
+ OS << (char)(Bits >> 56);
+ OS << (char)(Bits >> 48);
+ OS << (char)(Bits >> 40);
+ OS << (char)(Bits >> 32);
+ OS << (char)(Bits >> 24);
+ OS << (char)(Bits >> 16);
+ OS << (char)(Bits >> 8);
+ OS << (char)(Bits);
+ }
+ break;
+ default:
+ llvm_unreachable ("Invalid instruction size");
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
+ }
+
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ Triple TT(STI.getTargetTriple());
+ bool IsLittleEndian = TT.getArch() == Triple::ppc64le;
+ return new PPCMCCodeEmitter(MCII, Ctx, IsLittleEndian);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::
+getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_br24abs));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::
+getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the branch target.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_brcond14abs));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the immediate field.
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ Fixups.push_back(MCFixup::Create(IsLittleEndian? 0 : 2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16ds));
+ return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups, STI);
+
+ // Add a fixup for the TLS register, which simply provides a relocation
+ // hint to the linker that this statement is part of a relocation sequence.
+ // Return the thread-pointer register's encoding.
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ Triple TT(STI.getTargetTriple());
+ bool isPPC64 = TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le;
+ return CTX.getRegisterInfo()->getEncodingValue(isPPC64 ? PPC::X13 : PPC::R2);
+}
+
+unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // For special TLS calls, we need two fixups; one for the branch target
+ // (__tls_get_addr), which we create via getDirectBrEncoding as usual,
+ // and one for the TLSGD or TLSLD symbol, which is emitted here.
+ const MCOperand &MO = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ return getDirectBrEncoding(MI, OpNo, Fixups, STI);
+}
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg()) {
+ // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
new file mode 100644
index 000000000000..3ac0aca6b78c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -0,0 +1,133 @@
+//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCExpr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppcmcexpr"
+
+const PPCMCExpr*
+PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin);
+}
+
+void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
+ if (isDarwinSyntax()) {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO: OS << "lo16"; break;
+ case VK_PPC_HI: OS << "hi16"; break;
+ case VK_PPC_HA: OS << "ha16"; break;
+ }
+
+ OS << '(';
+ getSubExpr()->print(OS);
+ OS << ')';
+ } else {
+ getSubExpr()->print(OS);
+
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO: OS << "@l"; break;
+ case VK_PPC_HI: OS << "@h"; break;
+ case VK_PPC_HA: OS << "@ha"; break;
+ case VK_PPC_HIGHER: OS << "@higher"; break;
+ case VK_PPC_HIGHERA: OS << "@highera"; break;
+ case VK_PPC_HIGHEST: OS << "@highest"; break;
+ case VK_PPC_HIGHESTA: OS << "@highesta"; break;
+ }
+ }
+}
+
+bool
+PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ MCValue Value;
+
+ if (!getSubExpr()->EvaluateAsRelocatable(Value, Layout))
+ return false;
+
+ if (Value.isAbsolute()) {
+ int64_t Result = Value.getConstant();
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ Result = Result & 0xffff;
+ break;
+ case VK_PPC_HI:
+ Result = (Result >> 16) & 0xffff;
+ break;
+ case VK_PPC_HA:
+ Result = ((Result + 0x8000) >> 16) & 0xffff;
+ break;
+ case VK_PPC_HIGHER:
+ Result = (Result >> 32) & 0xffff;
+ break;
+ case VK_PPC_HIGHERA:
+ Result = ((Result + 0x8000) >> 32) & 0xffff;
+ break;
+ case VK_PPC_HIGHEST:
+ Result = (Result >> 48) & 0xffff;
+ break;
+ case VK_PPC_HIGHESTA:
+ Result = ((Result + 0x8000) >> 48) & 0xffff;
+ break;
+ }
+ Res = MCValue::get(Result);
+ } else {
+ if (!Layout)
+ return false;
+
+ MCContext &Context = Layout->getAssembler().getContext();
+ const MCSymbolRefExpr *Sym = Value.getSymA();
+ MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
+ if (Modifier != MCSymbolRefExpr::VK_None)
+ return false;
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ Modifier = MCSymbolRefExpr::VK_PPC_LO;
+ break;
+ case VK_PPC_HI:
+ Modifier = MCSymbolRefExpr::VK_PPC_HI;
+ break;
+ case VK_PPC_HA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HA;
+ break;
+ case VK_PPC_HIGHERA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHERA;
+ break;
+ case VK_PPC_HIGHER:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHER;
+ break;
+ case VK_PPC_HIGHEST:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHEST;
+ break;
+ case VK_PPC_HIGHESTA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ break;
+ }
+ Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context);
+ Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant());
+ }
+
+ return true;
+}
+
+void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+ Streamer.visitUsedExpr(*getSubExpr());
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
new file mode 100644
index 000000000000..bca408507e72
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -0,0 +1,96 @@
+//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCEXPR_H
+#define PPCMCEXPR_H
+
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+namespace llvm {
+
+class PPCMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_PPC_None,
+ VK_PPC_LO,
+ VK_PPC_HI,
+ VK_PPC_HA,
+ VK_PPC_HIGHER,
+ VK_PPC_HIGHERA,
+ VK_PPC_HIGHEST,
+ VK_PPC_HIGHESTA
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+ bool IsDarwin;
+
+ explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
+ bool _IsDarwin)
+ : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx);
+
+ static const PPCMCExpr *CreateLo(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return Create(VK_PPC_LO, Expr, isDarwin, Ctx);
+ }
+
+ static const PPCMCExpr *CreateHi(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return Create(VK_PPC_HI, Expr, isDarwin, Ctx);
+ }
+
+ static const PPCMCExpr *CreateHa(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return Create(VK_PPC_HA, Expr, isDarwin, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// isDarwinSyntax - True if expression is to be printed using Darwin syntax.
+ bool isDarwinSyntax() const { return IsDarwin; }
+
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const override;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override;
+ const MCSection *FindAssociatedSection() const override {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ // There are no TLS PPCMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 000000000000..4c6780ff75a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,307 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMCAsmInfo.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+// Pin the vtable to this file.
+PPCTargetStreamer::~PPCTargetStreamer() {}
+PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitPPCMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
+ unsigned Flavour = isPPC64 ? 0 : 1;
+ unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
+
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitPPCMCRegisterInfo(X, RA, Flavour, Flavour);
+ return X;
+}
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
+ Triple TheTriple(TT);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
+
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new PPCMCAsmInfoDarwin(isPPC64, TheTriple);
+ else
+ MAI = new PPCLinuxMCAsmInfo(isPPC64, TheTriple);
+
+ // Initial state of the frame pointer is R1.
+ unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
+ MAI->addInitialFrameState(Inst);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+
+ if (RM == Reloc::Default) {
+ Triple T(TT);
+ if (T.isOSDarwin())
+ RM = Reloc::DynamicNoPIC;
+ else
+ RM = Reloc::Static;
+ }
+ if (CM == CodeModel::Default) {
+ Triple T(TT);
+ if (!T.isOSDarwin() &&
+ (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
+ CM = CodeModel::Medium;
+ }
+ X->InitMCCodeGenInfo(RM, CM, OL);
+ return X;
+}
+
+namespace {
+class PPCTargetAsmStreamer : public PPCTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
+ : PPCTargetStreamer(S), OS(OS) {}
+ void emitTCEntry(const MCSymbol &S) override {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ OS << '\n';
+ }
+ void emitMachine(StringRef CPU) override {
+ OS << "\t.machine " << CPU << '\n';
+ }
+ virtual void emitAbiVersion(int AbiVersion) override {
+ OS << "\t.abiversion " << AbiVersion << '\n';
+ }
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ OS << "\t.localentry\t" << *S << ", " << *LocalOffset << '\n';
+ }
+};
+
+class PPCTargetELFStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ MCELFStreamer &getStreamer() {
+ return static_cast<MCELFStreamer &>(Streamer);
+ }
+ virtual void emitTCEntry(const MCSymbol &S) override {
+ // Creates a R_PPC64_TOC relocation
+ Streamer.EmitSymbolValue(&S, 8);
+ }
+ void emitMachine(StringRef CPU) override {
+ // FIXME: Is there anything to do in here or does this directive only
+ // limit the parser?
+ }
+ virtual void emitAbiVersion(int AbiVersion) override {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags &= ~ELF::EF_PPC64_ABI;
+ Flags |= (AbiVersion & ELF::EF_PPC64_ABI);
+ MCA.setELFHeaderEFlags(Flags);
+ }
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCSymbolData &Data = getStreamer().getOrCreateSymbolData(S);
+
+ int64_t Res;
+ if (!LocalOffset->EvaluateAsAbsolute(Res, MCA))
+ report_fatal_error(".localentry expression must be absolute.");
+
+ unsigned Encoded = ELF::encodePPC64LocalEntryOffset(Res);
+ if (Res != ELF::decodePPC64LocalEntryOffset(Encoded))
+ report_fatal_error(".localentry expression cannot be encoded.");
+
+ // The "other" values are stored in the last 6 bits of the second byte.
+ // The traditional defines for STO values assume the full byte and thus
+ // the shift to pack it.
+ unsigned Other = MCELF::getOther(Data) << 2;
+ Other &= ~ELF::STO_PPC64_LOCAL_MASK;
+ Other |= Encoded;
+ MCELF::setOther(Data, Other >> 2);
+
+ // For GAS compatibility, unless we already saw a .abiversion directive,
+ // set e_flags to indicate ELFv2 ABI.
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ if ((Flags & ELF::EF_PPC64_ABI) == 0)
+ MCA.setELFHeaderEFlags(Flags | 2);
+ }
+};
+
+class PPCTargetMachOStreamer : public PPCTargetStreamer {
+public:
+ PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
+ void emitTCEntry(const MCSymbol &S) override {
+ llvm_unreachable("Unknown pseudo-op: .tc");
+ }
+ void emitMachine(StringRef CPU) override {
+ // FIXME: We should update the CPUType, CPUSubType in the Object file if
+ // the new values are different from the defaults.
+ }
+ virtual void emitAbiVersion(int AbiVersion) override {
+ llvm_unreachable("Unknown pseudo-op: .abiversion");
+ }
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) {
+ llvm_unreachable("Unknown pseudo-op: .localentry");
+ }
+};
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &MAB,
+ raw_ostream &OS,
+ MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI,
+ bool RelaxAll,
+ bool NoExecStack) {
+ if (Triple(TT).isOSDarwin()) {
+ MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+ new PPCTargetMachOStreamer(*S);
+ return S;
+ }
+
+ MCStreamer *S =
+ createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ new PPCTargetELFStreamer(*S);
+ return S;
+}
+
+static MCStreamer *
+createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *InstPrint, MCCodeEmitter *CE,
+ MCAsmBackend *TAB, bool ShowInst) {
+
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
+ new PPCTargetAsmStreamer(*S, OS);
+ return S;
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI) {
+ bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
+ return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
+}
+
+extern "C" void LLVMInitializePowerPCTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
+ createPPCMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
+ createPPCMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+ createPPCMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+ createPPCMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
+ createPPCMCSubtargetInfo);
+
+ // Register the MC Code Emitter
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
+ createPPCMCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
+
+ // Register the MCInstPrinter.
+ TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
+ createPPCMCInstPrinter);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 000000000000..474395b93637
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,75 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+class raw_ostream;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+extern Target ThePPC64LETarget;
+
+MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
+
+/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
+MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
+ bool Is64Bit,
+ bool IsLittleEndian,
+ uint8_t OSABI);
+/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
+} // End llvm namespace
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
+// Defines symbolic names for PowerPC registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
new file mode 100644
index 000000000000..cff27baeb5ee
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue);
+
+ void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) override {
+ if (Writer->is64Bit()) {
+ report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
+ } else
+ RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+/// computes the log2 of the size of the relocation,
+/// used for relocation_info::r_length.
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return 0;
+ case FK_PCRel_2:
+ case FK_Data_2:
+ return 1;
+ case FK_PCRel_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_br24:
+ case FK_Data_4:
+ return 2;
+ case FK_PCRel_8:
+ case FK_Data_8:
+ return 3;
+ }
+ return 0;
+}
+
+/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
+/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+static unsigned getRelocType(const MCValue &Target,
+ const MCFixupKind FixupKind, // from
+ // Fixup.getKind()
+ const bool IsPCRel) {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // determine the type of the relocation
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
+ if (IsPCRel) { // relative to PC
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (relative)");
+ case PPC::fixup_ppc_br24:
+ Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = MachO::PPC_RELOC_BR14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (absolute)!");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16_SECTDIFF;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ break;
+ case FK_Data_2:
+ break;
+ }
+ }
+ return Type;
+}
+
+static void makeRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t FixupOffset, const uint32_t Index,
+ const unsigned IsPCRel, const unsigned Log2Size,
+ const unsigned IsExtern, const unsigned Type) {
+ MRE.r_word0 = FixupOffset;
+ // The bitfield offsets that work (as determined by trial-and-error)
+ // are different than what is documented in the mach-o manuals.
+ // This appears to be an endianness issue; reversing the order of the
+ // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // breaks x86/ARM assembly).
+ MRE.r_word1 = ((Index << 8) | // was << 0
+ (IsPCRel << 7) | // was << 24
+ (Log2Size << 5) | // was << 25
+ (IsExtern << 4) | // was << 27
+ (Type << 0)); // was << 28
+}
+
+static void
+makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t Addr, const unsigned Type,
+ const unsigned Log2Size, const unsigned IsPCRel,
+ const uint32_t Value2) {
+ // For notes on bitfield positions and endianness, see:
+ // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
+ MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
+ (IsPCRel << 30) | MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+}
+
+/// Compute fixup offset (address).
+static uint32_t getFixupOffset(const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ // On Mach-O, ppc_fixup_half16 relocations must refer to the
+ // start of the instruction, not the second halfword, as ELF does
+ if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ FixupOffset &= ~uint32_t(3);
+ return FixupOffset;
+}
+
+/// \return false if falling back to using non-scattered relocation,
+/// otherwise true for normal scattered relocation.
+/// based on X86MachObjectWriter::RecordScatteredRelocation
+/// and ARMMachObjectWriter::RecordScatteredRelocation
+bool PPCMachObjectWriter::RecordScatteredRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue) {
+ // caller already computes these, can we just pass and reuse?
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ const MCFixupKind FK = Fixup.getKind();
+ const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned Type = getRelocType(Target, FK, IsPCRel);
+
+ // Is this a local or SECTDIFF relocation entry?
+ // SECTDIFF relocation entries have symbol subtractions,
+ // and require two entries, the first for the add-symbol value,
+ // the second for the subtract-symbol value.
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // FIXME: is Type correct? see include/llvm/Support/MachO.h
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+ // FIXME: does FixedValue get used??
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == MachO::PPC_RELOC_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
+ // X86 had this piece, but ARM does not
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") +
+ Buffer + ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
+ // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
+ // see PPCMCExpr::EvaluateAsRelocatableImpl()
+ uint32_t other_half = 0;
+ switch (Type) {
+ case MachO::PPC_RELOC_LO16_SECTDIFF:
+ other_half = (FixedValue >> 16) & 0xffff;
+ // applyFixupOffset longer extracts the high part because it now assumes
+ // this was already done.
+ // It looks like this is not true for the FixedValue needed with Mach-O
+ // relocs.
+ // So we need to adjust FixedValue again here.
+ FixedValue &= 0xffff;
+ break;
+ case MachO::PPC_RELOC_HA16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue =
+ ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
+ break;
+ case MachO::PPC_RELOC_HI16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue = (FixedValue >> 16) & 0xffff;
+ break;
+ default:
+ llvm_unreachable("Invalid PPC scattered relocation type.");
+ break;
+ }
+
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
+ Log2Size, IsPCRel, Value2);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
+ }
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
+}
+
+// see PPCELFObjectWriter for a general outline of cases
+void PPCMachObjectWriter::RecordPPCRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ const MCFixupKind FK = Fixup.getKind(); // unsigned
+ const unsigned Log2Size = getFixupKindLog2Size(FK);
+ const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB() &&
+ // Q: are branch targets ever scattered?
+ RelocType != MachO::PPC_RELOC_BR24 &&
+ RelocType != MachO::PPC_RELOC_BR14) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ Log2Size, FixedValue);
+ return;
+ }
+
+ // this doesn't seem right for RIT_PPC_BR24
+ // Get the symbol data, if any.
+ const MCSymbolData *SD = nullptr;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // See <reloc.h>.
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = RelocType;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ // the above line stolen from ARM, not sure
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD =
+ Asm.getSectionData(SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+ }
+
+ // struct relocation_info (8 bytes)
+ MachO::any_relocation_info MRE;
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
+ Type);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(
+ new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
+ /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
new file mode 100644
index 000000000000..c2987b641c04
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -0,0 +1,86 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ case PPC::PRED_EQ: return PPC::PRED_NE;
+ case PPC::PRED_NE: return PPC::PRED_EQ;
+ case PPC::PRED_LT: return PPC::PRED_GE;
+ case PPC::PRED_GE: return PPC::PRED_LT;
+ case PPC::PRED_GT: return PPC::PRED_LE;
+ case PPC::PRED_LE: return PPC::PRED_GT;
+ case PPC::PRED_NU: return PPC::PRED_UN;
+ case PPC::PRED_UN: return PPC::PRED_NU;
+ case PPC::PRED_EQ_MINUS: return PPC::PRED_NE_PLUS;
+ case PPC::PRED_NE_MINUS: return PPC::PRED_EQ_PLUS;
+ case PPC::PRED_LT_MINUS: return PPC::PRED_GE_PLUS;
+ case PPC::PRED_GE_MINUS: return PPC::PRED_LT_PLUS;
+ case PPC::PRED_GT_MINUS: return PPC::PRED_LE_PLUS;
+ case PPC::PRED_LE_MINUS: return PPC::PRED_GT_PLUS;
+ case PPC::PRED_NU_MINUS: return PPC::PRED_UN_PLUS;
+ case PPC::PRED_UN_MINUS: return PPC::PRED_NU_PLUS;
+ case PPC::PRED_EQ_PLUS: return PPC::PRED_NE_MINUS;
+ case PPC::PRED_NE_PLUS: return PPC::PRED_EQ_MINUS;
+ case PPC::PRED_LT_PLUS: return PPC::PRED_GE_MINUS;
+ case PPC::PRED_GE_PLUS: return PPC::PRED_LT_MINUS;
+ case PPC::PRED_GT_PLUS: return PPC::PRED_LE_MINUS;
+ case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS;
+ case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS;
+ case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS;
+
+ // Simple predicates for single condition-register bits.
+ case PPC::PRED_BIT_SET: return PPC::PRED_BIT_UNSET;
+ case PPC::PRED_BIT_UNSET: return PPC::PRED_BIT_SET;
+ }
+ llvm_unreachable("Unknown PPC branch opcode!");
+}
+
+PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
+ switch (Opcode) {
+ case PPC::PRED_EQ: return PPC::PRED_EQ;
+ case PPC::PRED_NE: return PPC::PRED_NE;
+ case PPC::PRED_LT: return PPC::PRED_GT;
+ case PPC::PRED_GE: return PPC::PRED_LE;
+ case PPC::PRED_GT: return PPC::PRED_LT;
+ case PPC::PRED_LE: return PPC::PRED_GE;
+ case PPC::PRED_NU: return PPC::PRED_NU;
+ case PPC::PRED_UN: return PPC::PRED_UN;
+ case PPC::PRED_EQ_MINUS: return PPC::PRED_EQ_MINUS;
+ case PPC::PRED_NE_MINUS: return PPC::PRED_NE_MINUS;
+ case PPC::PRED_LT_MINUS: return PPC::PRED_GT_MINUS;
+ case PPC::PRED_GE_MINUS: return PPC::PRED_LE_MINUS;
+ case PPC::PRED_GT_MINUS: return PPC::PRED_LT_MINUS;
+ case PPC::PRED_LE_MINUS: return PPC::PRED_GE_MINUS;
+ case PPC::PRED_NU_MINUS: return PPC::PRED_NU_MINUS;
+ case PPC::PRED_UN_MINUS: return PPC::PRED_UN_MINUS;
+ case PPC::PRED_EQ_PLUS: return PPC::PRED_EQ_PLUS;
+ case PPC::PRED_NE_PLUS: return PPC::PRED_NE_PLUS;
+ case PPC::PRED_LT_PLUS: return PPC::PRED_GT_PLUS;
+ case PPC::PRED_GE_PLUS: return PPC::PRED_LE_PLUS;
+ case PPC::PRED_GT_PLUS: return PPC::PRED_LT_PLUS;
+ case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS;
+ case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS;
+ case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS;
+
+ case PPC::PRED_BIT_SET:
+ case PPC::PRED_BIT_UNSET:
+ llvm_unreachable("Invalid use of bit predicate code");
+ }
+ llvm_unreachable("Unknown PPC branch opcode!");
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
new file mode 100644
index 000000000000..10e328a8116e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -0,0 +1,68 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+// Generated files will use "namespace PPC". To avoid symbol clash,
+// undefine PPC here. PPC may be predefined on some hosts.
+#undef PPC
+
+namespace llvm {
+namespace PPC {
+ /// Predicate - These are "(BI << 5) | BO" for various predicates.
+ enum Predicate {
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4,
+ PRED_LT_MINUS = (0 << 5) | 14,
+ PRED_LE_MINUS = (1 << 5) | 6,
+ PRED_EQ_MINUS = (2 << 5) | 14,
+ PRED_GE_MINUS = (0 << 5) | 6,
+ PRED_GT_MINUS = (1 << 5) | 14,
+ PRED_NE_MINUS = (2 << 5) | 6,
+ PRED_UN_MINUS = (3 << 5) | 14,
+ PRED_NU_MINUS = (3 << 5) | 6,
+ PRED_LT_PLUS = (0 << 5) | 15,
+ PRED_LE_PLUS = (1 << 5) | 7,
+ PRED_EQ_PLUS = (2 << 5) | 15,
+ PRED_GE_PLUS = (0 << 5) | 7,
+ PRED_GT_PLUS = (1 << 5) | 15,
+ PRED_NE_PLUS = (2 << 5) | 7,
+ PRED_UN_PLUS = (3 << 5) | 15,
+ PRED_NU_PLUS = (3 << 5) | 7,
+
+ // When dealing with individual condition-register bits, we have simple set
+ // and unset predicates.
+ PRED_BIT_SET = 1024,
+ PRED_BIT_UNSET = 1025
+ };
+
+ /// Invert the specified predicate. != -> ==, < -> >=.
+ Predicate InvertPredicate(Predicate Opcode);
+
+ /// Assume the condition register is set by MI(a,b), return the predicate if
+ /// we modify the instructions such that condition register is set by MI(b,a).
+ Predicate getSwappedPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
new file mode 100644
index 000000000000..ba5fa4f79b4e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,105 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+ class PPCTargetMachine;
+ class PassRegistry;
+ class FunctionPass;
+ class ImmutablePass;
+ class JITCodeEmitter;
+ class MachineInstr;
+ class AsmPrinter;
+ class MCInst;
+
+ FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+#ifndef NDEBUG
+ FunctionPass *createPPCCTRLoopsVerify();
+#endif
+ FunctionPass *createPPCEarlyReturnPass();
+ FunctionPass *createPPCVSXCopyPass();
+ FunctionPass *createPPCVSXCopyCleanupPass();
+ FunctionPass *createPPCVSXFMAMutatePass();
+ FunctionPass *createPPCBranchSelectionPass();
+ FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+ FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &MCE);
+ void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin);
+
+ /// \brief Creates an PPC-specific Target Transformation Info pass.
+ ImmutablePass *createPPCTargetTransformInfoPass(const PPCTargetMachine *TM);
+
+ void initializePPCVSXFMAMutatePass(PassRegistry&);
+ extern char &PPCVSXFMAMutateID;
+
+ namespace PPCII {
+
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // PPC Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_PLT_OR_STUB - On a symbol operand "FOO", this indicates that the
+ /// reference is actually to the "FOO$stub" or "FOO@plt" symbol. This is
+ /// used for calls and jumps to external functions on Tiger and earlier, and
+ /// for PIC calls on Linux and ELF systems.
+ MO_PLT_OR_STUB = 1,
+
+ /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+ /// the function's picbase, e.g. lo16(symbol-picbase).
+ MO_PIC_FLAG = 2,
+
+ /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+ /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+ MO_NLP_FLAG = 4,
+
+ /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+ /// symbol with hidden visibility. This causes a different kind of
+ /// non-lazy-pointer to be generated.
+ MO_NLP_HIDDEN_FLAG = 8,
+
+ /// The next are not flags but distinct values.
+ MO_ACCESS_MASK = 0xf0,
+
+ /// MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
+ MO_LO = 1 << 4,
+ MO_HA = 2 << 4,
+
+ MO_TPREL_LO = 4 << 4,
+ MO_TPREL_HA = 3 << 4,
+
+ /// These values identify relocations on immediates folded
+ /// into memory operations.
+ MO_DTPREL_LO = 5 << 4,
+ MO_TLSLD_LO = 6 << 4,
+ MO_TOC_LO = 7 << 4,
+
+ // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
+ MO_TLS = 8 << 4
+ };
+ } // end namespace PPCII
+
+} // end namespace llvm;
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
new file mode 100644
index 000000000000..a9842b287cbb
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,344 @@
+//===-- PPC.td - Describe the PowerPC Target Machine -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+
+//===----------------------------------------------------------------------===//
+// CPU Directives //
+//===----------------------------------------------------------------------===//
+
+def Directive440 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_440", "">;
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+def DirectiveA2 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_A2", "">;
+def DirectiveE500mc : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E500mc", "">;
+def DirectiveE5500 : SubtargetFeature<"", "DarwinDirective",
+ "PPC::DIR_E5500", "">;
+def DirectivePwr3: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR3", "">;
+def DirectivePwr4: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR4", "">;
+def DirectivePwr5: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5", "">;
+def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "">;
+def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">;
+def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">;
+def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">;
+def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">;
+
+def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true",
+ "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+ "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureCRBits : SubtargetFeature<"crbits", "UseCRBits", "true",
+ "Use condition-register bits individually">;
+def FeatureAltivec : SubtargetFeature<"altivec","HasAltivec", "true",
+ "Enable Altivec instructions">;
+def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
+ "Enable the MFOCRF instruction">;
+def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+ "Enable the fsqrt instruction">;
+def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
+ "Enable the fcpsgn instruction">;
+def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
+ "Enable the fre instruction">;
+def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
+ "Enable the fres instruction">;
+def FeatureFRSQRTE : SubtargetFeature<"frsqrte", "HasFRSQRTE", "true",
+ "Enable the frsqrte instruction">;
+def FeatureFRSQRTES : SubtargetFeature<"frsqrtes", "HasFRSQRTES", "true",
+ "Enable the frsqrtes instruction">;
+def FeatureRecipPrec : SubtargetFeature<"recipprec", "HasRecipPrec", "true",
+ "Assume higher precision reciprocal estimates">;
+def FeatureSTFIWX : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+ "Enable the stfiwx instruction">;
+def FeatureLFIWAX : SubtargetFeature<"lfiwax","HasLFIWAX", "true",
+ "Enable the lfiwax instruction">;
+def FeatureFPRND : SubtargetFeature<"fprnd", "HasFPRND", "true",
+ "Enable the fri[mnpz] instructions">;
+def FeatureFPCVT : SubtargetFeature<"fpcvt", "HasFPCVT", "true",
+ "Enable fc[ft]* (unsigned and single-precision) and lfiwzx instructions">;
+def FeatureISEL : SubtargetFeature<"isel","HasISEL", "true",
+ "Enable the isel instruction">;
+def FeaturePOPCNTD : SubtargetFeature<"popcntd","HasPOPCNTD", "true",
+ "Enable the popcnt[dw] instructions">;
+def FeatureLDBRX : SubtargetFeature<"ldbrx","HasLDBRX", "true",
+ "Enable the ldbrx instruction">;
+def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
+ "Enable Book E instructions">;
+def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
+ "Enable QPX instructions">;
+def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
+ "Enable VSX instructions",
+ [FeatureAltivec]>;
+
+def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
+ "Treat mftb as deprecated">;
+def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
+ "Treat vector data stream cache control instructions as deprecated">;
+
+// Note: Future features to add when support is extended to more
+// recent ISA levels:
+//
+// CMPB p6, p6x, p7 cmpb
+// DFP p6, p6x, p7 decimal floating-point instructions
+// POPCNTB p5 through p7 popcntb and related instructions
+// VSX p7 vector-scalar instruction set
+
+//===----------------------------------------------------------------------===//
+// Classes used for relation maps.
+//===----------------------------------------------------------------------===//
+// RecFormRel - Filter class used to relate non-record-form instructions with
+// their record-form variants.
+class RecFormRel;
+
+// AltVSXFMARel - Filter class used to relate the primary addend-killing VSX
+// FMA instruction forms with their corresponding factor-killing forms.
+class AltVSXFMARel {
+ bit IsVSXFMAAlt = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Relation Map Definitions.
+//===----------------------------------------------------------------------===//
+
+def getRecordFormOpcode : InstrMapping {
+ let FilterClass = "RecFormRel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName", "Interpretation64Bit"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["RC"];
+ // The key column are the non-record-form instructions.
+ let KeyCol = ["0"];
+ // Value columns RC=1
+ let ValueCols = [["1"]];
+}
+
+def getNonRecordFormOpcode : InstrMapping {
+ let FilterClass = "RecFormRel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName", "Interpretation64Bit"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["RC"];
+ // The key column are the record-form instructions.
+ let KeyCol = ["1"];
+ // Value columns are RC=0
+ let ValueCols = [["0"]];
+}
+
+def getAltVSXFMAOpcode : InstrMapping {
+ let FilterClass = "AltVSXFMARel";
+ // Instructions with the same BaseName and Interpretation64Bit values
+ // form a row.
+ let RowFields = ["BaseName"];
+ // Instructions with the same RC value form a column.
+ let ColFields = ["IsVSXFMAAlt"];
+ // The key column are the (default) addend-killing instructions.
+ let KeyCol = ["0"];
+ // Value columns IsVSXFMAAlt=1
+ let ValueCols = [["1"]];
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : ProcessorModel<"440", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE, DeprecatedMFTB]>;
+def : ProcessorModel<"450", PPC440Model, [Directive440, FeatureISEL,
+ FeatureFRES, FeatureFRSQRTE,
+ FeatureBookE, DeprecatedMFTB]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603e", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"603ev", G3Itineraries, [Directive603,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"604e", G3Itineraries, [Directive604,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"620", G3Itineraries, [Directive620,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"750", G4Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g3", G3Itineraries, [Directive750,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive7400, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE]>;
+def : ProcessorModel<"970", G5Model,
+ [Directive970, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt,
+ FeatureFRES, FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"g5", G5Model,
+ [Directive970, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
+ FeatureFRES, FeatureFRSQRTE,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"e500mc", PPCE500mcModel,
+ [DirectiveE500mc, FeatureMFOCRF,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
+def : ProcessorModel<"e5500", PPCE5500Model,
+ [DirectiveE5500, FeatureMFOCRF, Feature64Bit,
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
+def : ProcessorModel<"a2", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, DeprecatedMFTB]>;
+def : ProcessorModel<"a2q", PPCA2Model,
+ [DirectiveA2, FeatureBookE, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
+ /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
+def : ProcessorModel<"pwr3", G5Model,
+ [DirectivePwr3, FeatureAltivec,
+ FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr4", G5Model,
+ [DirectivePwr4, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRES, FeatureFRSQRTE,
+ FeatureSTFIWX, Feature64Bit]>;
+def : ProcessorModel<"pwr5", G5Model,
+ [DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr5x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr6", G5Model,
+ [DirectivePwr6, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr6x", G5Model,
+ [DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
+ FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr7", P7Model,
+ [DirectivePwr7, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */,
+ [DirectivePwr8, FeatureAltivec,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
+ FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
+ FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
+ FeatureFPRND, FeatureFPCVT, FeatureISEL,
+ FeaturePOPCNTD, FeatureLDBRX,
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : ProcessorModel<"ppc64", G5Model,
+ [Directive64, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"ppc64le", G5Model,
+ [Directive64, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+
+ // FIXME: Unset this when no longer needed!
+ let decodePositionallyEncodedOperands = 1;
+
+ let noNamedPositionallyEncodedOperands = 1;
+}
+
+def PPCAsmParser : AsmParser {
+ let ShouldEmitMatchRegisterName = 0;
+}
+
+def PPCAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // We do not use hard coded registers in asm strings. However, some
+ // InstAlias definitions use immediate literals. Set RegisterPrefix
+ // so that those are not misinterpreted as registers.
+ string RegisterPrefix = "%";
+}
+
+def PPC : Target {
+ // Information about the instructions.
+ let InstructionSet = PPCInstrInfo;
+
+ let AssemblyParsers = [PPCAsmParser];
+ let AssemblyParserVariants = [PPCAsmParserVariant];
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 000000000000..138402241957
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,1397 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "InstPrinter/PPCInstPrinter.h"
+#include "PPCMachineFunctionInfo.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "PPCTargetStreamer.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "asmprinter"
+
+namespace {
+ class PPCAsmPrinter : public AsmPrinter {
+ protected:
+ MapVector<MCSymbol*, MCSymbol*> TOC;
+ const PPCSubtarget &Subtarget;
+ uint64_t TOCLabelID;
+ public:
+ explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+ const char *getPassName() const override {
+ return "PowerPC Assembly Printer";
+ }
+
+ MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
+
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+ };
+
+ /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const override {
+ return "Linux PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
+
+ void EmitFunctionEntryLabel() override;
+
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ };
+
+ /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+ /// OS X
+ class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+ public:
+ explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : PPCAsmPrinter(TM, Streamer) {}
+
+ const char *getPassName() const override {
+ return "Darwin PPC Assembly Printer";
+ }
+
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
+
+ void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+ };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left. Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+ switch (RegName[0]) {
+ case 'r':
+ case 'f':
+ case 'v':
+ if (RegName[1] == 's')
+ return RegName + 2;
+ return RegName + 1;
+ case 'c': if (RegName[1] == 'r') return RegName + 2;
+ }
+
+ return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const DataLayout *DL = TM.getDataLayout();
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register: {
+ const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+ // Linux assembler (Others?) does not take register mnemonics.
+ // FIXME - What about special registers used in mfspr/mtspr?
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName;
+ return;
+ }
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << DL->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_BlockAddress:
+ O << *GetBlockAddressSymbol(MO.getBlockAddress());
+ return;
+ case MachineOperand::MO_GlobalAddress: {
+ // Computing the address of a global symbol, not calling it.
+ const GlobalValue *GV = MO.getGlobal();
+ MCSymbol *SymToPrint;
+
+ // External or weakly linked global variables need non-lazily-resolved stubs
+ if (TM.getRelocationModel() != Reloc::Static &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) {
+ if (!GV->hasHiddenVisibility()) {
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>()
+ .getGVStubEntry(SymToPrint);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
+ } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage()) {
+ SymToPrint = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>().
+ getHiddenGVStubEntry(SymToPrint);
+ if (!StubSym.getPointer())
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
+ } else {
+ SymToPrint = getSymbol(GV);
+ }
+ } else {
+ SymToPrint = getSymbol(GV);
+ }
+
+ O << *SymToPrint;
+
+ printOffset(MO.getOffset(), O);
+ return;
+ }
+
+ default:
+ O << "<unknown operand type: " << (unsigned)MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ // See if this is a generic print operand
+ return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
+ case 'c': // Don't print "$" before a global var name or constant.
+ break; // PPC never has a prefix.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ case 'I':
+ // Write 'i' if an integer constant, otherwise nothing. Used to print
+ // addi vs add, etc.
+ if (MI->getOperand(OpNo).isImm())
+ O << "i";
+ return false;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'y': // A memory reference for an X-form instruction
+ {
+ const char *RegName = "r0";
+ if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+ O << RegName << ", ";
+ printOperand(MI, OpNo, O);
+ return false;
+ }
+ }
+ }
+
+ assert(MI->getOperand(OpNo).isReg());
+ O << "0(";
+ printOperand(MI, OpNo, O);
+ O << ")";
+ return false;
+}
+
+
+/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
+/// exists for it. If not, create one. Then return a symbol that references
+/// the TOC entry.
+MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
+ const DataLayout *DL = TM.getDataLayout();
+ MCSymbol *&TOCEntry = TOC[Sym];
+
+ // To avoid name clash check if the name already exists.
+ while (!TOCEntry) {
+ if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
+ "C" + Twine(TOCLabelID++)) == nullptr) {
+ TOCEntry = GetTempSymbol("C", TOCLabelID);
+ }
+ }
+
+ return TOCEntry;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+ MCInst TmpInst;
+ bool isPPC64 = Subtarget.isPPC64();
+
+ // Lower multi-instruction pseudo operations.
+ switch (MI->getOpcode()) {
+ default: break;
+ case TargetOpcode::DBG_VALUE:
+ llvm_unreachable("Should be handled target independently");
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8: {
+ // Transform %LR = MovePCtoLR
+ // Into this, where the label is the PIC base:
+ // bl L1$pb
+ // L1$pb:
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+
+ // Emit the 'bl'.
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext)));
+
+ // Emit the label.
+ OutStreamer.EmitLabel(PICBase);
+ return;
+ }
+ case PPC::GetGBRO: {
+ // Get the offset from the GOT Base Register to the GOT
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ MCSymbol *PICOffset = MF->getInfo<PPCFunctionInfo>()->getPICOffsetSymbol();
+ TmpInst.setOpcode(PPC::LWZ);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(PICOffset, MCSymbolRefExpr::VK_None, OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(MF->getPICBaseSymbol(),
+ MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCOperand MO = TmpInst.getOperand(1);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(MCBinaryExpr::CreateSub(Exp,
+ PB,
+ OutContext));
+ TmpInst.addOperand(MO);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::UpdateGBR: {
+ // Update the GOT Base Register to point to the GOT. It may be possible to
+ // merge this with the PPC::GetGBRO, doing it all in one step.
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ TmpInst.setOpcode(PPC::ADD4);
+ TmpInst.addOperand(TmpInst.getOperand(0));
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LWZtoc: {
+ // Transform %X3 = LWZtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LWZ, and the global address operand to be a
+ // reference to the GOT entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_None,
+ OutContext);
+ const MCExpr *PB =
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")),
+ OutContext);
+ Exp = MCBinaryExpr::CreateSub(Exp, PB, OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LDtocJTI:
+ case PPC::LDtocCPT:
+ case PPC::LDtoc: {
+ // Transform %X3 = LDtoc <ga:@min1>, %X2
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD, and the global address operand to be a
+ // reference to the TOC entry we will synthesize later.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+
+ // Map symbol -> label of TOC entry
+ assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
+ MCSymbol *MOSymbol = nullptr;
+ if (MO.isGlobal())
+ MOSymbol = getSymbol(MO.getGlobal());
+ else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+
+ case PPC::ADDIStocHA: {
+ // Transform %Xd = ADDIStocHA %X2, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDIS8. If the global address is external, has
+ // common linkage, is a non-local function address, or is a jump table
+ // address, then generate a TOC entry and reference that. Otherwise
+ // reference the symbol directly.
+ TmpInst.setOpcode(PPC::ADDIS8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
+ "Invalid operand for ADDIStocHA!");
+ MCSymbol *MOSymbol = nullptr;
+ bool IsExternal = false;
+ bool IsNonLocalFunction = false;
+ bool IsCommon = false;
+ bool IsAvailExt = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ IsExternal = GV->isDeclaration();
+ IsCommon = GV->hasCommonLinkage();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
+ IsAvailExt = GV->hasAvailableExternallyLinkage();
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ else if (MO.isJTI())
+ MOSymbol = GetJTISymbol(MO.getIndex());
+
+ if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt ||
+ MO.isJTI() || TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::LDtocL: {
+ // Transform %Xd = LDtocL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD. If the global address is external, has
+ // common linkage, or is a jump table address, then reference the
+ // associated TOC entry. Otherwise reference the symbol directly.
+ TmpInst.setOpcode(PPC::LD);
+ const MachineOperand &MO = MI->getOperand(1);
+ assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
+ "Invalid operand for LDtocL!");
+ MCSymbol *MOSymbol = nullptr;
+
+ if (MO.isJTI())
+ MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
+ else if (MO.isCPI()) {
+ MOSymbol = GetCPISymbol(MO.getIndex());
+ if (TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+ else if (MO.isGlobal()) {
+ const GlobalValue *GValue = MO.getGlobal();
+ MOSymbol = getSymbol(GValue);
+ if (GValue->getType()->getElementType()->isFunctionTy() ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage() ||
+ TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::ADDItocL: {
+ // Transform %Xd = ADDItocL %Xs, <ga:@sym>
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to ADDI8. If the global address is external, then
+ // generate a TOC entry and reference that. Otherwise reference the
+ // symbol directly.
+ TmpInst.setOpcode(PPC::ADDI8);
+ const MachineOperand &MO = MI->getOperand(2);
+ assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
+ MCSymbol *MOSymbol = nullptr;
+ bool IsExternal = false;
+ bool IsNonLocalFunction = false;
+
+ if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ IsExternal = GV->isDeclaration();
+ IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker());
+ } else if (MO.isCPI())
+ MOSymbol = GetCPISymbol(MO.getIndex());
+
+ if (IsNonLocalFunction || IsExternal ||
+ TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
+ OutContext);
+ TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case PPC::ADDISgotTprelHA: {
+ // Transform: %Xd = ADDISgotTprelHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTprel));
+ return;
+ }
+ case PPC::LDgotTprelL:
+ case PPC::LDgotTprelL32: {
+ // Transform %Xd = LDgotTprelL <ga:@sym>, %Xs
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+
+ // Change the opcode to LD.
+ TmpInst.setOpcode(isPPC64 ? PPC::LD : PPC::LWZ);
+ const MachineOperand &MO = MI->getOperand(1);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *Exp =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
+ OutContext);
+ TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+
+ case PPC::PPC32PICGOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ MCSymbol *GOTRef = OutContext.CreateTempSymbol();
+ MCSymbol *NextInstr = OutContext.CreateTempSymbol();
+
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BL)
+ // FIXME: We would like an efficient form for this, so we don't have to do
+ // a lot of extra uniquing.
+ .addExpr(MCSymbolRefExpr::Create(NextInstr, OutContext)));
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(GOTSymbol, OutContext),
+ MCSymbolRefExpr::Create(GOTRef, OutContext),
+ OutContext);
+ OutStreamer.EmitLabel(GOTRef);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(NextInstr);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LWZ)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0)
+ .addReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADD4)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ case PPC::PPC32GOT: {
+ MCSymbol *GOTSymbol = OutContext.GetOrCreateSymbol(StringRef("_GLOBAL_OFFSET_TABLE_"));
+ const MCExpr *SymGotTlsL =
+ MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_LO,
+ OutContext);
+ const MCExpr *SymGotTlsHA =
+ MCSymbolRefExpr::Create(GOTSymbol, MCSymbolRefExpr::VK_PPC_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LI)
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsL));
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(0).getReg())
+ .addExpr(SymGotTlsHA));
+ return;
+ }
+ case PPC::ADDIStlsgdHA: {
+ // Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::ADDItlsgdL:
+ // Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsgd@l
+ case PPC::ADDItlsgdL32: {
+ // Transform: %Rd = ADDItlsgdL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsgd
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsGD =
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSGD,
+ OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsGD));
+ return;
+ }
+ case PPC::GETtlsADDR:
+ // Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
+ case PPC::GETtlsADDR32: {
+ // Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+
+ if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Kind = MCSymbolRefExpr::VK_PLT;
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
+ OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ?
+ PPC::BL8_NOP_TLS : PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDIStlsldHA: {
+ // Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
+ assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS8)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(PPC::X2)
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::ADDItlsldL:
+ // Transform: %Xd = ADDItlsldL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@got@tlsld@l
+ case PPC::ADDItlsldL32: {
+ // Transform: %Rd = ADDItlsldL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@got@tlsld
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymGotTlsLD =
+ MCSymbolRefExpr::Create(MOSymbol, Subtarget.isPPC64() ?
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO :
+ MCSymbolRefExpr::VK_PPC_GOT_TLSLD,
+ OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymGotTlsLD));
+ return;
+ }
+ case PPC::GETtlsldADDR:
+ // Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
+ // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
+ case PPC::GETtlsldADDR32: {
+ // Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
+ // Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT
+
+ StringRef Name = "__tls_get_addr";
+ MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
+
+ if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ Kind = MCSymbolRefExpr::VK_PLT;
+
+ const MCSymbolRefExpr *TlsRef =
+ MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymVar =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
+ OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ?
+ PPC::BL8_NOP_TLS : PPC::BL_TLS)
+ .addExpr(TlsRef)
+ .addExpr(SymVar));
+ return;
+ }
+ case PPC::ADDISdtprelHA:
+ // Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
+ // Into: %Xd = ADDIS8 %X3, sym@dtprel@ha
+ case PPC::ADDISdtprelHA32: {
+ // Transform: %Rd = ADDISdtprelHA32 %R3, <ga:@sym>
+ // Into: %Rd = ADDIS %R3, sym@dtprel@ha
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
+ OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDIS8 : PPC::ADDIS)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(Subtarget.isPPC64() ? PPC::X3 : PPC::R3)
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::ADDIdtprelL:
+ // Transform: %Xd = ADDIdtprelL %Xs, <ga:@sym>
+ // Into: %Xd = ADDI8 %Xs, sym@dtprel@l
+ case PPC::ADDIdtprelL32: {
+ // Transform: %Rd = ADDIdtprelL32 %Rs, <ga:@sym>
+ // Into: %Rd = ADDI %Rs, sym@dtprel@l
+ const MachineOperand &MO = MI->getOperand(2);
+ const GlobalValue *GValue = MO.getGlobal();
+ MCSymbol *MOSymbol = getSymbol(GValue);
+ const MCExpr *SymDtprel =
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
+ OutContext);
+ EmitToStreamer(OutStreamer,
+ MCInstBuilder(Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI)
+ .addReg(MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addExpr(SymDtprel));
+ return;
+ }
+ case PPC::MFOCRF:
+ case PPC::MFOCRF8:
+ if (!Subtarget.hasMFOCRF()) {
+ // Transform: %R3 = MFOCRF %CR7
+ // Into: %R3 = MFCR ;; cr7
+ unsigned NewOpcode =
+ MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ break;
+ case PPC::MTOCRF:
+ case PPC::MTOCRF8:
+ if (!Subtarget.hasMFOCRF()) {
+ // Transform: %CR7 = MTOCRF %R3
+ // Into: MTCRF mask, %R3 ;; cr7
+ unsigned NewOpcode =
+ MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
+ unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
+ ->getEncodingValue(MI->getOperand(0).getReg());
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, MCInstBuilder(NewOpcode)
+ .addImm(Mask)
+ .addReg(MI->getOperand(1).getReg()));
+ return;
+ }
+ break;
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA_32:
+ case PPC::LWA: {
+ // Verify alignment is legal, so we don't create relocations
+ // that can't be supported.
+ // FIXME: This test is currently disabled for Darwin. The test
+ // suite shows a handful of test cases that fail this check for
+ // Darwin. Those need to be investigated before this sanity test
+ // can be enabled for those subtargets.
+ if (!Subtarget.isDarwin()) {
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
+ llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
+ }
+ // Now process the instruction normally.
+ break;
+ }
+ }
+
+ LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
+ EmitToStreamer(OutStreamer, TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (Subtarget.isELFv2ABI()) {
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitAbiVersion(2);
+ }
+
+ if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
+ return AsmPrinter::EmitStartOfAsmFile(M);
+
+ // FIXME: The use of .got2 assumes large GOT model (-fPIC), which is not
+ // optimal for some cases. We should consider supporting small model (-fpic)
+ // as well in the future.
+ assert(TM.getCodeModel() != CodeModel::Small &&
+ "Small code model PIC is currently unsupported.");
+ OutStreamer.SwitchSection(OutContext.getELFSection(".got2",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly()));
+
+ MCSymbol *TOCSym = OutContext.GetOrCreateSymbol(Twine(".L.TOC."));
+ MCSymbol *CurrentPos = OutContext.CreateTempSymbol();
+
+ OutStreamer.EmitLabel(CurrentPos);
+
+ // The GOT pointer points to the middle of the GOT, in order to reference the
+ // entire 64kB range. 0x8000 is the midpoint.
+ const MCExpr *tocExpr =
+ MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(CurrentPos, OutContext),
+ MCConstantExpr::Create(0x8000, OutContext),
+ OutContext);
+
+ OutStreamer.EmitAssignment(TOCSym, tocExpr);
+
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+ // linux/ppc32 - Normal entry label.
+ if (!Subtarget.isPPC64() && TM.getRelocationModel() != Reloc::PIC_)
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ if (!Subtarget.isPPC64()) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+ if (PPCFI->usesPICBase()) {
+ MCSymbol *RelocSymbol = PPCFI->getPICOffsetSymbol();
+ MCSymbol *PICBase = MF->getPICBaseSymbol();
+ OutStreamer.EmitLabel(RelocSymbol);
+
+ const MCExpr *OffsExpr =
+ MCBinaryExpr::CreateSub(
+ MCSymbolRefExpr::Create(OutContext.GetOrCreateSymbol(Twine(".L.TOC.")),
+ OutContext),
+ MCSymbolRefExpr::Create(PICBase, OutContext),
+ OutContext);
+ OutStreamer.EmitValue(OffsExpr, 4);
+ OutStreamer.EmitLabel(CurrentFnSym);
+ return;
+ } else
+ return AsmPrinter::EmitFunctionEntryLabel();
+ }
+
+ // ELFv2 ABI - Normal entry label.
+ if (Subtarget.isELFv2ABI())
+ return AsmPrinter::EmitFunctionEntryLabel();
+
+ // Emit an official procedure descriptor.
+ MCSectionSubPair Current = OutStreamer.getCurrentSection();
+ const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(Section);
+ OutStreamer.EmitLabel(CurrentFnSym);
+ OutStreamer.EmitValueToAlignment(8);
+ MCSymbol *Symbol1 =
+ OutContext.GetOrCreateSymbol(".L." + Twine(CurrentFnSym->getName()));
+ // Generates a R_PPC64_ADDR64 (from FK_DATA_8) relocation for the function
+ // entry point.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol1, OutContext),
+ 8 /*size*/);
+ MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ // Generates a R_PPC64_TOC relocation for TOC base insertion.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
+ MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
+ 8/*size*/);
+ // Emit a null environment pointer.
+ OutStreamer.EmitIntValue(0, 8 /* size */);
+ OutStreamer.SwitchSection(Current.first, Current.second);
+
+ MCSymbol *RealFnSym = OutContext.GetOrCreateSymbol(
+ ".L." + Twine(CurrentFnSym->getName()));
+ OutStreamer.EmitLabel(RealFnSym);
+ CurrentFnSymForSize = RealFnSym;
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+ const DataLayout *TD = TM.getDataLayout();
+
+ bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(*OutStreamer.getTargetStreamer());
+
+ if (!TOC.empty()) {
+ const MCSectionELF *Section;
+
+ if (isPPC64)
+ Section = OutStreamer.getContext().getELFSection(".toc",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ else
+ Section = OutStreamer.getContext().getELFSection(".got2",
+ ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
+ SectionKind::getReadOnly());
+ OutStreamer.SwitchSection(Section);
+
+ for (MapVector<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+ E = TOC.end(); I != E; ++I) {
+ OutStreamer.EmitLabel(I->second);
+ MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
+ if (isPPC64)
+ TS.emitTCEntry(*S);
+ else
+ OutStreamer.EmitSymbolValue(S, 4);
+ }
+ }
+
+ MachineModuleInfoELF &MMIELF =
+ MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
+void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
+ // In the ELFv2 ABI, in functions that use the TOC register, we need to
+ // provide two entry points. The ABI guarantees that when calling the
+ // local entry point, r2 is set up by the caller to contain the TOC base
+ // for this function, and when calling the global entry point, r12 is set
+ // up by the caller to hold the address of the global entry point. We
+ // thus emit a prefix sequence along the following lines:
+ //
+ // func:
+ // # global entry point
+ // addis r2,r12,(.TOC.-func)@ha
+ // addi r2,r2,(.TOC.-func)@l
+ // .localentry func, .-func
+ // # local entry point, followed by function body
+ //
+ // This ensures we have r2 set up correctly while executing the function
+ // body, no matter which entry point is called.
+ if (Subtarget.isELFv2ABI()
+ // Only do all that if the function uses r2 in the first place.
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+
+ MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(GlobalEntryLabel);
+ const MCSymbolRefExpr *GlobalEntryLabelExp =
+ MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
+
+ MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+
+ MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
+ OutStreamer.EmitLabel(LocalEntryLabel);
+ const MCSymbolRefExpr *LocalEntryLabelExp =
+ MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
+ const MCExpr *LocalOffsetExp =
+ MCBinaryExpr::CreateSub(LocalEntryLabelExp,
+ GlobalEntryLabelExp, OutContext);
+
+ PPCTargetStreamer *TS =
+ static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+
+ if (TS)
+ TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
+ }
+}
+
+/// EmitFunctionBodyEnd - Print the traceback table before the .size
+/// directive.
+///
+void PPCLinuxAsmPrinter::EmitFunctionBodyEnd() {
+ // Only the 64-bit target requires a traceback table. For now,
+ // we only emit the word of zeroes that GDB requires to find
+ // the end of the function, and zeroes for the eight-byte
+ // mandatory fields.
+ // FIXME: We should fill in the eight-byte mandatory fields as described in
+ // the PPC64 ELF ABI (this is a low-priority item because GDB does not
+ // currently make use of these fields).
+ if (Subtarget.isPPC64()) {
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ OutStreamer.EmitIntValue(0, 8/*size*/);
+ }
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ static const char *const CPUDirectives[] = {
+ "",
+ "ppc",
+ "ppc440",
+ "ppc601",
+ "ppc602",
+ "ppc603",
+ "ppc7400",
+ "ppc750",
+ "ppc970",
+ "ppcA2",
+ "ppce500mc",
+ "ppce5500",
+ "power3",
+ "power4",
+ "power5",
+ "power5x",
+ "power6",
+ "power6x",
+ "power7",
+ "ppc64",
+ "ppc64le"
+ };
+
+ unsigned Directive = Subtarget.getDarwinDirective();
+ if (Subtarget.hasMFOCRF() && Directive < PPC::DIR_970)
+ Directive = PPC::DIR_970;
+ if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+ Directive = PPC::DIR_7400;
+ if (Subtarget.isPPC64() && Directive < PPC::DIR_64)
+ Directive = PPC::DIR_64;
+ assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+
+ assert(Directive < array_lengthof(CPUDirectives) &&
+ "CPUDirectives[] might not be up-to-date!");
+ PPCTargetStreamer &TStreamer =
+ *static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
+ TStreamer.emitMachine(CPUDirectives[Directive]);
+
+ // Prime text sections so they are adjacent. This reduces the likelihood a
+ // large data or debug section causes a branch to exceed 16M limit.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText()));
+ } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+ OutStreamer.SwitchSection(
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText()));
+ }
+ OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+ // Remove $stub suffix, add $lazy_ptr.
+ StringRef NoStub = Sym->getName().substr(0, Sym->getName().size()-5);
+ return Ctx.GetOrCreateSymbol(NoStub + "$lazy_ptr");
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+ // Add $tmp suffix to $stub, yielding $stub$tmp.
+ return Ctx.GetOrCreateSymbol(Sym->getName() + "$tmp");
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isDarwin = Subtarget.isDarwin();
+
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+ // .lazy_symbol_pointer
+ const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+
+ // Output stubs for dynamically-linked functions
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ 32, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
+
+ // mflr r0
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
+ // bcl 20, 31, AnonSymbol
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCLalways).addExpr(Anon));
+ OutStreamer.EmitLabel(AnonSymbol);
+ // mflr r11
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
+ // addis r11, r11, ha16(LazyPtr - AnonSymbol)
+ const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::R11)
+ .addReg(PPC::R11)
+ .addExpr(SubHa16));
+ // mtlr r0
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
+
+ // ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ // lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(SubLo16).addExpr(SubLo16)
+ .addReg(PPC::R11));
+ // mtctr r12
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
+
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
+ }
+ OutStreamer.AddBlankLine();
+ return;
+ }
+
+ const MCSection *StubSection =
+ OutContext.getMachOSection("__TEXT","__symbol_stub1",
+ MachO::S_SYMBOL_STUBS |
+ MachO::S_ATTR_PURE_INSTRUCTIONS,
+ 16, SectionKind::getText());
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ MCSymbol *Stub = Stubs[i].first;
+ MCSymbol *RawSym = Stubs[i].second.getPointer();
+ MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
+
+ OutStreamer.SwitchSection(StubSection);
+ EmitAlignment(4);
+ OutStreamer.EmitLabel(Stub);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ // lis r11, ha16(LazyPtr)
+ const MCExpr *LazyPtrHa16 =
+ PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::LIS)
+ .addReg(PPC::R11)
+ .addExpr(LazyPtrHa16));
+
+ // ldu r12, lo16(LazyPtr)(r11)
+ // lwzu r12, lo16(LazyPtr)(r11)
+ const MCExpr *LazyPtrLo16 =
+ PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
+ EmitToStreamer(OutStreamer, MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
+ .addReg(PPC::R12)
+ .addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
+ .addReg(PPC::R11));
+
+ // mtctr r12
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
+ // bctr
+ EmitToStreamer(OutStreamer, MCInstBuilder(PPC::BCTR));
+
+ OutStreamer.SwitchSection(LSPSection);
+ OutStreamer.EmitLabel(LazyPtr);
+ OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
+ MCSymbol *DyldStubBindingHelper =
+ OutContext.GetOrCreateSymbol(StringRef("dyld_stub_binding_helper"));
+ if (isPPC64) {
+ // .quad dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 8);
+ } else {
+ // .long dyld_stub_binding_helper
+ OutStreamer.EmitSymbolValue(DyldStubBindingHelper, 4);
+ }
+ }
+
+ OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+ bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+
+ // Darwin/PPC always uses mach-o.
+ const TargetLoweringObjectFileMachO &TLOFMacho =
+ static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+ MachineModuleInfoMachO &MMIMacho =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+ MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+ if (!Stubs.empty())
+ EmitFunctionStubs(Stubs);
+
+ if (MAI->doesSupportExceptionHandling() && MMI) {
+ // Add the (possibly multiple) personalities to the set of global values.
+ // Only referenced functions get into the Personalities list.
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+ E = Personalities.end(); I != E; ++I) {
+ if (*I) {
+ MCSymbol *NLPSym = getSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ MMIMacho.getGVStubEntry(NLPSym);
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
+ }
+ }
+ }
+
+ // Output stubs for dynamically-linked functions.
+ Stubs = MMIMacho.GetGVStubList();
+
+ // Output macho stubs for external and common global variables.
+ if (!Stubs.empty()) {
+ // Switch with ".non_lazy_symbol_pointer" directive.
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .indirect_symbol _foo
+ MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ Stubs = MMIMacho.GetHiddenGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ EmitAlignment(isPPC64 ? 3 : 2);
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(Stubs[i].first);
+ // .long _foo
+ OutStreamer.EmitValue(MCSymbolRefExpr::
+ Create(Stubs[i].second.getPointer(),
+ OutContext),
+ isPPC64 ? 8 : 4/*size*/);
+ }
+
+ Stubs.clear();
+ OutStreamer.AddBlankLine();
+ }
+
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never generates
+ // code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+ MCStreamer &Streamer) {
+ const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+ if (Subtarget->isDarwin())
+ return new PPCDarwinAsmPrinter(tm, Streamer);
+ return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+ TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 000000000000..ee906712ee02
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,211 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block. It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass. This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-branch-select"
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace llvm {
+ void initializePPCBSelPass(PassRegistry&);
+}
+
+namespace {
+ struct PPCBSel : public MachineFunctionPass {
+ static char ID;
+ PPCBSel() : MachineFunctionPass(ID) {
+ initializePPCBSelPass(*PassRegistry::getPassRegistry());
+ }
+
+ /// BlockSizes - The sizes of the basic blocks in the function.
+ std::vector<unsigned> BlockSizes;
+
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ const char *getPassName() const override {
+ return "PowerPC Branch Selector";
+ }
+ };
+ char PPCBSel::ID = 0;
+}
+
+INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
+ false, false)
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+ return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+ const PPCInstrInfo *TII =
+ static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+ // Give the blocks of the function a dense, in-order, numbering.
+ Fn.RenumberBlocks();
+ BlockSizes.resize(Fn.getNumBlockIDs());
+
+ // Measure each MBB and compute a size for the entire function.
+ unsigned FuncSize = 0;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+
+ unsigned BlockSize = 0;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+ MBBI != EE; ++MBBI)
+ BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+ BlockSizes[MBB->getNumber()] = BlockSize;
+ FuncSize += BlockSize;
+ }
+
+ // If the entire function is smaller than the displacement of a branch field,
+ // we know we don't need to shrink any branches in this function. This is a
+ // common case.
+ if (FuncSize < (1 << 15)) {
+ BlockSizes.clear();
+ return false;
+ }
+
+ // For each conditional branch, if the offset to its destination is larger
+ // than the offset field allows, transform it into a long branch sequence
+ // like this:
+ // short branch:
+ // bCC MBB
+ // long branch:
+ // b!CC $PC+8
+ // b MBB
+ //
+ bool MadeChange = true;
+ bool EverMadeChange = false;
+ while (MadeChange) {
+ // Iteratively expand branches until we reach a fixed point.
+ MadeChange = false;
+
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI) {
+ MachineBasicBlock &MBB = *MFI;
+ unsigned MBBStartOffset = 0;
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineBasicBlock *Dest = nullptr;
+ if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
+ Dest = I->getOperand(2).getMBB();
+ else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
+ !I->getOperand(1).isImm())
+ Dest = I->getOperand(1).getMBB();
+ else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
+ I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&
+ !I->getOperand(0).isImm())
+ Dest = I->getOperand(0).getMBB();
+
+ if (!Dest) {
+ MBBStartOffset += TII->GetInstSizeInBytes(I);
+ continue;
+ }
+
+ // Determine the offset from the current branch to the destination
+ // block.
+ int BranchSize;
+ if (Dest->getNumber() <= MBB.getNumber()) {
+ // If this is a backwards branch, the delta is the offset from the
+ // start of this block to this branch, plus the sizes of all blocks
+ // from this block to the dest.
+ BranchSize = MBBStartOffset;
+
+ for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ } else {
+ // Otherwise, add the size of the blocks between this block and the
+ // dest to the number of bytes left in this block.
+ BranchSize = -MBBStartOffset;
+
+ for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+ BranchSize += BlockSizes[i];
+ }
+
+ // If this branch is in range, ignore it.
+ if (isInt<16>(BranchSize)) {
+ MBBStartOffset += 4;
+ continue;
+ }
+
+ // Otherwise, we have to expand it to a long branch.
+ MachineInstr *OldBranch = I;
+ DebugLoc dl = OldBranch->getDebugLoc();
+
+ if (I->getOpcode() == PPC::BCC) {
+ // The BCC operands are:
+ // 0. PPC branch predicate
+ // 1. CR register
+ // 2. Target MBB
+ PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+ unsigned CRReg = I->getOperand(1).getReg();
+
+ // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+ BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+ .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+ } else if (I->getOpcode() == PPC::BC) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
+ } else if (I->getOpcode() == PPC::BCn) {
+ unsigned CRBit = I->getOperand(0).getReg();
+ BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDNZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
+ } else if (I->getOpcode() == PPC::BDZ8) {
+ BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
+ } else {
+ llvm_unreachable("Unhandled branch type!");
+ }
+
+ // Uncond branch to the real destination.
+ I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+ // Remove the old branch from the function.
+ OldBranch->eraseFromParent();
+
+ // Remember that this instruction is 8-bytes, increase the size of the
+ // block by 4, remember to iterate.
+ BlockSizes[MBB.getNumber()] += 4;
+ MBBStartOffset += 8;
+ ++NumExpanded;
+ MadeChange = true;
+ }
+ }
+ EverMadeChange |= MadeChange;
+ }
+
+ BlockSizes.clear();
+ return true;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
new file mode 100644
index 000000000000..ec1e34d91f93
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -0,0 +1,663 @@
+//===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass identifies loops where we can generate the PPC branch instructions
+// that decrement and test the count register (CTR) (bdnz and friends).
+//
+// The pattern that defines the induction variable can changed depending on
+// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
+// normalizes induction variables, and the Loop Strength Reduction pass
+// run by 'llc' may also make changes to the induction variable.
+//
+// Criteria for CTR loops:
+// - Countable loops (w/ ind. var for a trip count)
+// - Try inner-most loops first
+// - No nested CTR loops.
+// - No function calls in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#ifndef NDEBUG
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#endif
+
+#include <algorithm>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ctrloops"
+
+#ifndef NDEBUG
+static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
+#endif
+
+STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
+
+namespace llvm {
+ void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+ void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
+}
+
+namespace {
+ struct PPCCTRLoops : public FunctionPass {
+
+#ifndef NDEBUG
+ static int Counter;
+#endif
+
+ public:
+ static char ID;
+
+ PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
+ PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolution>();
+ }
+
+ private:
+ bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+ bool convertToCTRLoop(Loop *L);
+
+ private:
+ PPCTargetMachine *TM;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ const DataLayout *DL;
+ DominatorTree *DT;
+ const TargetLibraryInfo *LibInfo;
+ };
+
+ char PPCCTRLoops::ID = 0;
+#ifndef NDEBUG
+ int PPCCTRLoops::Counter = 0;
+#endif
+
+#ifndef NDEBUG
+ struct PPCCTRLoopsVerify : public MachineFunctionPass {
+ public:
+ static char ID;
+
+ PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ private:
+ MachineDominatorTree *MDT;
+ };
+
+ char PPCCTRLoopsVerify::ID = 0;
+#endif // NDEBUG
+} // end anonymous namespace
+
+INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
+ false, false)
+
+FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
+ return new PPCCTRLoops(TM);
+}
+
+#ifndef NDEBUG
+INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+
+FunctionPass *llvm::createPPCCTRLoopsVerify() {
+ return new PPCCTRLoopsVerify();
+}
+#endif // NDEBUG
+
+bool PPCCTRLoops::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
+
+ bool MadeChange = false;
+
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end();
+ I != E; ++I) {
+ Loop *L = *I;
+ if (!L->getParentLoop())
+ MadeChange |= convertToCTRLoop(L);
+ }
+
+ return MadeChange;
+}
+
+static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
+ if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+ return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
+
+ return false;
+}
+
+bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+ J != JE; ++J) {
+ if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
+ }
+
+ continue;
+ }
+
+ if (!TM)
+ return true;
+ const TargetLowering *TLI = TM->getTargetLowering();
+
+ if (Function *F = CI->getCalledFunction()) {
+ // Most intrinsics don't become function calls, but some might.
+ // sin, cos, exp and log are always calls.
+ unsigned Opcode;
+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (F->getIntrinsicID()) {
+ default: continue;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::powi:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
+ else
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
+ }
+ }
+
+ // PowerPC does not use [US]DIVREM or other library calls for
+ // operations on regular types which are not otherwise library calls
+ // (i.e. soft float or atomics). If adapting for targets that do,
+ // additional care is required here.
+
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ // Non-read-only functions are never treated as intrinsics.
+ if (!CI->onlyReadsMemory())
+ return true;
+
+ // Conversion happens only for FP calls.
+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+ return true;
+
+ switch (Func) {
+ default: return true;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ continue; // ISD::FABS is never a library call.
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ Opcode = ISD::FSQRT; break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ Opcode = ISD::FFLOOR; break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ Opcode = ISD::FNEARBYINT; break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ Opcode = ISD::FCEIL; break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ Opcode = ISD::FTRUNC; break;
+ }
+
+ MVT VTy =
+ TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+ if (VTy == MVT::Other)
+ return true;
+
+ if (TLI->isOperationLegalOrCustom(Opcode, VTy))
+ continue;
+ else if (VTy.isVector() &&
+ TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
+ continue;
+
+ return true;
+ }
+ }
+
+ return true;
+ } else if (isa<BinaryOperator>(J) &&
+ J->getType()->getScalarType()->isPPC_FP128Ty()) {
+ // Most operations on ppc_f128 values become calls.
+ return true;
+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+ CastInst *CI = cast<CastInst>(J);
+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
+ isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
+ return true;
+ } else if (isLargeIntegerTy(TT.isArch32Bit(),
+ J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (TT.isArch32Bit() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+ // On PowerPC, indirect jumps use the counter register.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+ if (!TM)
+ return true;
+ const TargetLowering *TLI = TM->getTargetLowering();
+
+ if (TLI->supportJumpTables() &&
+ SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
+ bool MadeChange = false;
+
+ Triple TT = Triple(L->getHeader()->getParent()->getParent()->
+ getTargetTriple());
+ if (!TT.isArch32Bit() && !TT.isArch64Bit())
+ return MadeChange; // Unknown arch. type.
+
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ MadeChange |= convertToCTRLoop(*I);
+ }
+
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (MadeChange)
+ return MadeChange;
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = CTRLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= CTRLoopLimit)
+ return false;
+ Counter++;
+ }
+#endif
+
+ // We don't want to spill/restore the counter register, and so we don't
+ // want to use the counter register if the loop contains calls.
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I)
+ if (mightUseCTR(TT, *I))
+ return MadeChange;
+
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ BasicBlock *CountedExitBlock = nullptr;
+ const SCEV *ExitCount = nullptr;
+ BranchInst *CountedExitBranch = nullptr;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ IE = ExitingBlocks.end(); I != IE; ++I) {
+ const SCEV *EC = SE->getExitCount(L, *I);
+ DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
+ (*I)->getName() << ": " << *EC << "\n");
+ if (isa<SCEVCouldNotCompute>(EC))
+ continue;
+ if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+ if (ConstEC->getValue()->isZero())
+ continue;
+ } else if (!SE->isLoopInvariant(EC, L))
+ continue;
+
+ if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ continue;
+
+ // We now have a loop-invariant count of loop iterations (which is not the
+ // constant zero) for which we know that this loop will not exit via this
+ // exisiting block.
+
+ // We need to make sure that this block will run on every loop iteration.
+ // For this to be true, we must dominate all blocks with backedges. Such
+ // blocks are in-loop predecessors to the header block.
+ bool NotAlways = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
+ if (!L->contains(*PI))
+ continue;
+
+ if (!DT->dominates(*I, *PI)) {
+ NotAlways = true;
+ break;
+ }
+ }
+
+ if (NotAlways)
+ continue;
+
+ // Make sure this blocks ends with a conditional branch.
+ Instruction *TI = (*I)->getTerminator();
+ if (!TI)
+ continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional())
+ continue;
+
+ CountedExitBranch = BI;
+ } else
+ continue;
+
+ // Note that this block may not be the loop latch block, even if the loop
+ // has a latch block.
+ CountedExitBlock = *I;
+ ExitCount = EC;
+ break;
+ }
+
+ if (!CountedExitBlock)
+ return MadeChange;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // If we don't have a preheader, then insert one. If we already have a
+ // preheader, then we can use it (except if the preheader contains a use of
+ // the CTR register because some such uses might be reordered by the
+ // selection DAG after the mtctr instruction).
+ if (!Preheader || mightUseCTR(TT, Preheader))
+ Preheader = InsertPreheaderForLoop(L, this);
+ if (!Preheader)
+ return MadeChange;
+
+ DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
+
+ // Insert the count into the preheader and replace the condition used by the
+ // selected branch.
+ MadeChange = true;
+
+ SCEVExpander SCEVE(*SE, "loopcnt");
+ LLVMContext &C = SE->getContext();
+ Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
+ Type::getInt32Ty(C);
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(CountType, 1));
+ Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
+ Preheader->getTerminator());
+
+ IRBuilder<> CountBuilder(Preheader->getTerminator());
+ Module *M = Preheader->getParent()->getParent();
+ Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
+ CountType);
+ CountBuilder.CreateCall(MTCTRFunc, ECValue);
+
+ IRBuilder<> CondBuilder(CountedExitBranch);
+ Value *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
+ Value *NewCond = CondBuilder.CreateCall(DecFunc);
+ Value *OldCond = CountedExitBranch->getCondition();
+ CountedExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(CountedExitBranch->getSuccessor(0)))
+ CountedExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ DeleteDeadPHIs(CountedExitBlock);
+
+ ++NumCTRLoops;
+ return MadeChange;
+}
+
+#ifndef NDEBUG
+static bool clobbersCTR(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
+ return true;
+ } else if (MO.isRegMask()) {
+ if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool verifyCTRBranch(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator BI = I;
+ SmallSet<MachineBasicBlock *, 16> Visited;
+ SmallVector<MachineBasicBlock *, 8> Preds;
+ bool CheckPreds;
+
+ if (I == MBB->begin()) {
+ Visited.insert(MBB);
+ goto queue_preds;
+ } else
+ --I;
+
+check_block:
+ Visited.insert(MBB);
+ if (I == MBB->end())
+ goto queue_preds;
+
+ CheckPreds = true;
+ for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
+ unsigned Opc = I->getOpcode();
+ if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
+ CheckPreds = false;
+ break;
+ }
+
+ if (I != BI && clobbersCTR(I)) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
+ MBB->getFullName() << ") instruction " << *I <<
+ " clobbers CTR, invalidating " << "BB#" <<
+ BI->getParent()->getNumber() << " (" <<
+ BI->getParent()->getFullName() << ") instruction " <<
+ *BI << "\n");
+ return false;
+ }
+
+ if (I == IE)
+ break;
+ }
+
+ if (!CheckPreds && Preds.empty())
+ return true;
+
+ if (CheckPreds) {
+queue_preds:
+ if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
+ DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
+ BI->getParent()->getNumber() << " (" <<
+ BI->getParent()->getFullName() << ") instruction " <<
+ *BI << "\n");
+ return false;
+ }
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PIE = MBB->pred_end(); PI != PIE; ++PI)
+ Preds.push_back(*PI);
+ }
+
+ do {
+ MBB = Preds.pop_back_val();
+ if (!Visited.count(MBB)) {
+ I = MBB->getLastNonDebugInstr();
+ goto check_block;
+ }
+ } while (!Preds.empty());
+
+ return true;
+}
+
+bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
+ // any other instructions that might clobber the ctr register.
+ for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
+ I != IE; ++I) {
+ MachineBasicBlock *MBB = I;
+ if (!MDT->isReachableFromEntry(MBB))
+ continue;
+
+ for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
+ MIIE = MBB->end(); MII != MIIE; ++MII) {
+ unsigned Opc = MII->getOpcode();
+ if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
+ Opc == PPC::BDZ8 || Opc == PPC::BDZ)
+ if (!verifyCTRBranch(MBB, MII))
+ llvm_unreachable("Invalid PPC CTR loop!");
+ }
+ }
+
+ return false;
+}
+#endif // NDEBUG
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 000000000000..222760a0cb91
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,199 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+class CCIfNotSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("!State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+ // On PPC64, integer return values are always promoted to i64
+ CCIfType<[i32, i1], CCIfSubtarget<"isPPC64()", CCPromoteToType<i64>>>,
+ CCIfType<[i1], CCIfNotSubtarget<"isPPC64()", CCPromoteToType<i32>>>,
+
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+
+ // Floating point types returned as "direct" go into F1 .. F8; note that
+ // only the ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // Vector types returned as "direct" go into V2 .. V9; note that only the
+ // ELFv2 ABI fully utilizes all these registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+]>;
+
+
+// Note that we don't currently have calling conventions for 64-bit
+// PowerPC, but handle all the complexities of the ABI in the lowering
+// logic. FIXME: See if the logic can be simplified with use of CCs.
+// This may require some extensions to current table generation.
+
+// Simple calling convention for 64-bit ELF PowerPC fast isel.
+// Only handle ints and floats. All ints are promoted to i64.
+// Vector types and quadword ints are not handled.
+def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i64>>,
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>
+]>;
+
+// Simple return-value convention for 64-bit ELF PowerPC fast isel.
+// All small ints are promoted to i64. Vector types, quadword ints,
+// and multiple register returns are "supported" to avoid compile
+// errors, but none are handled by the fast selector.
+def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i64>>,
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 32-bit ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC32_SVR4_Common : CallingConv<[
+ CCIfType<[i1], CCPromoteToType<i32>>,
+
+ // The ABI requires i64 to be passed in two adjacent registers with the first
+ // register having an odd register number.
+ CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>,
+
+ // The first 8 integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+ // Make sure the i64 words from a long double are either both passed in
+ // registers or both passed on the stack.
+ CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>,
+
+ // FP values are passed in F1 - F8.
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+ // Split arguments have an alignment of 8 bytes on the stack.
+ CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+
+ CCIfType<[i32], CCAssignToStack<4, 4>>,
+
+ // Floats are stored in double precision format, thus they have the same
+ // alignment and size as doubles.
+ CCIfType<[f32,f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32, v2f64, v2i64], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC32_SVR4_VarArg : CallingConv<[
+ CCDelegateTo<CC_PPC32_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC32_SVR4_VarArg, this calling convention first tries to
+// put vector arguments in vector registers before putting them on the stack.
+def CC_PPC32_SVR4 : CallingConv<[
+ // The first 12 Vector arguments are passed in AltiVec registers.
+ CCIfType<[v16i8, v8i16, v4i32, v4f32],
+ CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+ CCIfType<[v2f64, v2i64],
+ CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
+ VSH10, VSH11, VSH12, VSH13]>>,
+
+ CCDelegateTo<CC_PPC32_SVR4_Common>
+]>;
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC32_SVR4 calling convention.
+//
+// The only purpose of CC_PPC32_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+
+def CC_PPC32_SVR4_ByVal : CallingConv<[
+ CCIfByVal<CCPassByVal<4, 4>>,
+
+ CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
+]>;
+
+def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
+def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>;
+
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28,
+ R29, R30, R31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
+
+def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
+ X21, X22, X23, X24, X25, X26, X27, X28,
+ X29, X30, X31, F14, F15, F16, F17, F18,
+ F19, F20, F21, F22, F23, F24, F25, F26,
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+
+def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
+
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 000000000000..08755238f925
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,293 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+ class PPCCodeEmitter : public MachineFunctionPass {
+ TargetMachine &TM;
+ JITCodeEmitter &MCE;
+ MachineModuleInfo *MMI;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineModuleInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ static char ID;
+
+ /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+ /// its address in the function into this pointer.
+ void *MovePCtoLROffset;
+ public:
+
+ PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+ : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
+
+ /// getBinaryCodeForInstr - This function, generated by the
+ /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+ /// machine instructions.
+ uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+
+ MachineRelocation GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const;
+
+ /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+ unsigned getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const;
+
+ unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getAbsDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const;
+ unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+ const char *getPassName() const override {
+ return "PowerPC Machine Code Emitter";
+ }
+
+ /// runOnMachineFunction - emits the given MachineFunction to memory
+ ///
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ /// emitBasicBlock - emits the given MachineBasicBlock to memory
+ ///
+ void emitBasicBlock(MachineBasicBlock &MBB);
+ };
+}
+
+char PPCCodeEmitter::ID = 0;
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+ JITCodeEmitter &JCE) {
+ return new PPCCodeEmitter(TM, JCE);
+}
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+ assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+ MF.getTarget().getRelocationModel() != Reloc::Static) &&
+ "JIT relocation model must be set to static or default!");
+
+ MMI = &getAnalysis<MachineModuleInfo>();
+ MCE.setModuleInfo(MMI);
+ do {
+ MovePCtoLROffset = nullptr;
+ MCE.startFunction(MF);
+ for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+ emitBasicBlock(*BB);
+ } while (MCE.finishFunction(MF));
+
+ return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+ MCE.StartMachineBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+ const MachineInstr &MI = *I;
+ MCE.processDebugLoc(MI.getDebugLoc(), true);
+ switch (MI.getOpcode()) {
+ default:
+ MCE.emitWordBE(getBinaryCodeForInstr(MI));
+ break;
+ case TargetOpcode::CFI_INSTRUCTION:
+ break;
+ case TargetOpcode::EH_LABEL:
+ MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+ break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ break; // pseudo opcode, no side effects
+ case PPC::MovePCtoLR:
+ case PPC::MovePCtoLR8:
+ assert(TM.getRelocationModel() == Reloc::PIC_);
+ MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+ MCE.emitWordBE(0x48000005); // bl 1
+ break;
+ }
+ MCE.processDebugLoc(MI.getDebugLoc(), false);
+ }
+}
+
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
+ (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+ return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+}
+
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO,
+ unsigned RelocID) const {
+ // If in PIC mode, we need to encode the negated address of the
+ // 'movepctolr' into the unrelocated field. After relocation, we'll have
+ // &gv-&movepctolr-4 in the imm field. Once &movepctolr is added to the imm
+ // field, we get &gv. This doesn't happen for branch relocations, which are
+ // always implicitly pc relative.
+ intptr_t Cst = 0;
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+ Cst = -(intptr_t)MovePCtoLROffset - 4;
+ }
+
+ if (MO.isGlobal())
+ return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+ const_cast<GlobalValue *>(MO.getGlobal()),
+ Cst, isa<Function>(MO.getGlobal()));
+ if (MO.isSymbol())
+ return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+ RelocID, MO.getSymbolName(), Cst);
+ if (MO.isCPI())
+ return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+
+ if (MO.isMBB())
+ return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+ RelocID, MO.getMBB());
+
+ assert(MO.isJTI());
+ return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+ RelocID, MO.getIndex(), Cst);
+}
+
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
+}
+
+unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
+}
+
+unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+ unsigned RelocID;
+ switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
+ default: llvm_unreachable("Unsupported target operand flags!");
+ case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break;
+ case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break;
+ }
+
+ MCE.addRelocation(GetRelocation(MO, RelocID));
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memri, which has the low 16-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+
+ // Add a fixup for the displacement field.
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+ return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+ // displacement and the next 5 bits as the register #.
+ assert(MI.getOperand(OpNo+1).isReg());
+ unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+
+ const MachineOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm())
+ return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
+
+ MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+ return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+ const MachineOperand &MO) const {
+
+ if (MO.isReg()) {
+ // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // The GPR operand should come through here though.
+ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
+ MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+ return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
+ }
+
+ assert(MO.isImm() &&
+ "Relocation required in an instruction that we cannot encode!");
+ return MO.getImm();
+}
+
+#include "PPCGenCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
new file mode 100644
index 000000000000..2e524d604789
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -0,0 +1,2282 @@
+//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// PPCGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCISelLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+//===----------------------------------------------------------------------===//
+//
+// TBD:
+// FastLowerArguments: Handle simple cases.
+// PPCMaterializeGV: Handle TLS.
+// SelectCall: Handle function pointers.
+// SelectCall: Handle multi-register return values.
+// SelectCall: Optimize away nops for local calls.
+// processCallArgs: Handle bit-converted arguments.
+// finishCall: Handle multi-register return values.
+// PPCComputeAddress: Handle parameter references as FrameIndex's.
+// PPCEmitCmp: Handle immediate as operand 1.
+// SelectCall: Handle small byval arguments.
+// SelectIntrinsicCall: Implement.
+// SelectSelect: Implement.
+// Consider factoring isTypeLegal into the base class.
+// Implement switches and jump tables.
+//
+//===----------------------------------------------------------------------===//
+using namespace llvm;
+
+#define DEBUG_TYPE "ppcfastisel"
+
+namespace {
+
+typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ long Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+} Address;
+
+class PPCFastISel final : public FastISel {
+
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ const PPCSubtarget *PPCSubTarget;
+ LLVMContext *Context;
+
+ public:
+ explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo),
+ TM(FuncInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()),
+ PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
+ Context(&FuncInfo.Fn->getContext()) { }
+
+ // Backend specific FastISel code.
+ private:
+ bool TargetSelectInstruction(const Instruction *I) override;
+ unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) override;
+ bool FastLowerArguments() override;
+ unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectIToFP(const Instruction *I, bool IsSigned);
+ bool SelectFPToI(const Instruction *I, bool IsSigned);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectCall(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt, unsigned DestReg);
+ bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC, bool IsZExt = true,
+ unsigned FP64LoadOpc = PPC::LFD);
+ bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
+ bool PPCComputeAddress(const Value *Obj, Address &Addr);
+ void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg);
+ bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt);
+ unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+ unsigned PPCMaterialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC);
+ unsigned PPCMaterialize64BitInt(int64_t Imm,
+ const TargetRegisterClass *RC);
+ unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned);
+ unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
+
+ // Call handling routines.
+ private:
+ bool processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg);
+ void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg);
+ CCAssignFn *usePPC32CCs(unsigned Flag);
+
+ private:
+ #include "PPCGenFastISel.inc"
+
+};
+
+} // end anonymous namespace
+
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
+ if (Flag == 1)
+ return CC_PPC32_SVR4;
+ else if (Flag == 2)
+ return CC_PPC32_SVR4_ByVal;
+ else if (Flag == 3)
+ return CC_PPC32_SVR4_VarArg;
+ else
+ return RetCC_PPC;
+}
+
+static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // These are not representable with any single compare.
+ case CmpInst::FCMP_FALSE:
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ default:
+ return Optional<PPC::Predicate>();
+
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::ICMP_EQ:
+ return PPC::PRED_EQ;
+
+ case CmpInst::FCMP_OGT:
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ return PPC::PRED_GT;
+
+ case CmpInst::FCMP_OGE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ return PPC::PRED_GE;
+
+ case CmpInst::FCMP_OLT:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_SLT:
+ return PPC::PRED_LT;
+
+ case CmpInst::FCMP_OLE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ return PPC::PRED_LE;
+
+ case CmpInst::FCMP_ONE:
+ case CmpInst::ICMP_NE:
+ return PPC::PRED_NE;
+
+ case CmpInst::FCMP_ORD:
+ return PPC::PRED_NU;
+
+ case CmpInst::FCMP_UNO:
+ return PPC::PRED_UN;
+ }
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel, and return its equivalent machine type in VT.
+// FIXME: Copied directly from ARM -- factor into base class?
+bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT Evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (Evt == MVT::Other || !Evt.isSimple()) return false;
+ VT = Evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel as a load target, and return its equivalent machine type in VT.
+bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
+ return true;
+ }
+
+ return false;
+}
+
+// Given a value Obj, create an Address object Addr that represents its
+// address. Return false if we can't handle it.
+bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
+ const User *U = nullptr;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look through bitcasts.
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ long TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
+ II != IE; ++II, ++GTI) {
+ const Value *Op = *II;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // FIXME: References to parameters fall through to the behavior
+ // below. They should be able to reference a frame index since
+ // they are stored to the stack, so we can get "ld rx, offset(r1)"
+ // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
+ // just contain the parameter. Try to handle this with a FI.
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0)
+ Addr.Base.Reg = getRegForValue(Obj);
+
+ // Prevent assignment of base register to X0, which is inappropriate
+ // for loads and stores alike.
+ if (Addr.Base.Reg != 0)
+ MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ return Addr.Base.Reg != 0;
+}
+
+// Fix up some addresses that can't be used directly. For example, if
+// an offset won't fit in an instruction field, we may need to move it
+// into an index register.
+void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg) {
+
+ // Check whether the offset fits in the instruction field.
+ if (!isInt<16>(Addr.Offset))
+ UseOffset = false;
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ if (!UseOffset) {
+ IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
+ : Type::getInt64Ty(*Context));
+ const ConstantInt *Offset =
+ ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
+ IndexReg = PPCMaterializeInt(Offset, MVT::i64);
+ assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
+ }
+}
+
+// Emit a load instruction if possible, returning true if we succeeded,
+// otherwise false. See commentary below for how the register class of
+// the load is determined.
+bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC,
+ bool IsZExt, unsigned FP64LoadOpc) {
+ unsigned Opc;
+ bool UseOffset = true;
+
+ // If ResultReg is given, it determines the register class of the load.
+ // Otherwise, RC is the register class to use. If the result of the
+ // load isn't anticipated in this block, both may be zero, in which
+ // case we must make a conservative guess. In particular, don't assign
+ // R0 or X0 to the result register, as the result may be used in a load,
+ // store, add-immediate, or isel that won't permit this. (Though
+ // perhaps the spill and reload of live-exit values would handle this?)
+ const TargetRegisterClass *UseRC =
+ (ResultReg ? MRI.getRegClass(ResultReg) :
+ (RC ? RC :
+ (VT == MVT::f64 ? &PPC::F8RCRegClass :
+ (VT == MVT::f32 ? &PPC::F4RCRegClass :
+ (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass)))));
+
+ bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
+ break;
+ case MVT::i16:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
+ (Is32BitInt ? PPC::LHA : PPC::LHA8));
+ break;
+ case MVT::i32:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
+ (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
+ if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
+ UseOffset = false;
+ break;
+ case MVT::i64:
+ Opc = PPC::LD;
+ assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
+ "64-bit load with 32-bit target??");
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::LFS;
+ break;
+ case MVT::f64:
+ Opc = FP64LoadOpc;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+ if (ResultReg == 0)
+ ResultReg = createResultReg(UseRC);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset) {
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ } else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::LBZ: Opc = PPC::LBZX; break;
+ case PPC::LBZ8: Opc = PPC::LBZX8; break;
+ case PPC::LHZ: Opc = PPC::LHZX; break;
+ case PPC::LHZ8: Opc = PPC::LHZX8; break;
+ case PPC::LHA: Opc = PPC::LHAX; break;
+ case PPC::LHA8: Opc = PPC::LHAX8; break;
+ case PPC::LWZ: Opc = PPC::LWZX; break;
+ case PPC::LWZ8: Opc = PPC::LWZX8; break;
+ case PPC::LWA: Opc = PPC::LWAX; break;
+ case PPC::LWA_32: Opc = PPC::LWAX_32; break;
+ case PPC::LD: Opc = PPC::LDX; break;
+ case PPC::LFS: Opc = PPC::LFSX; break;
+ case PPC::LFD: Opc = PPC::LFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a load instruction.
+bool PPCFastISel::SelectLoad(const Instruction *I) {
+ // FIXME: No atomic loads are supported.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. This is necessary
+ // to constrain RA from using R0/X0 when this is not legal.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
+ return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Emit a store instruction to store SrcReg at Addr.
+bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
+ assert(SrcReg && "Nothing to store!");
+ unsigned Opc;
+ bool UseOffset = true;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
+ bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::STB : PPC::STB8;
+ break;
+ case MVT::i16:
+ Opc = Is32BitInt ? PPC::STH : PPC::STH8;
+ break;
+ case MVT::i32:
+ assert(Is32BitInt && "Not GPRC for i32??");
+ Opc = PPC::STW;
+ break;
+ case MVT::i64:
+ Opc = PPC::STD;
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::STFS;
+ break;
+ case MVT::f64:
+ Opc = PPC::STFD;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg)
+ .addImm(Addr.Offset)
+ .addFrameIndex(Addr.Base.FI)
+ .addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::STB: Opc = PPC::STBX; break;
+ case PPC::STH : Opc = PPC::STHX; break;
+ case PPC::STW : Opc = PPC::STWX; break;
+ case PPC::STB8: Opc = PPC::STBX8; break;
+ case PPC::STH8: Opc = PPC::STHX8; break;
+ case PPC::STW8: Opc = PPC::STWX8; break;
+ case PPC::STD: Opc = PPC::STDX; break;
+ case PPC::STFS: Opc = PPC::STFSX; break;
+ case PPC::STFD: Opc = PPC::STFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
+ .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a store instruction.
+bool PPCFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // FIXME: No atomics loads are supported.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(Op0->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!PPCEmitStore(VT, SrcReg, Addr))
+ return false;
+
+ return true;
+}
+
+// Attempt to fast-select a branch instruction.
+bool PPCFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+ if (!OptPPCPred)
+ return false;
+
+ PPC::Predicate PPCPred = OptPPCPred.getValue();
+
+ // Take advantage of fall-through opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ PPCPred = PPC::InvertPredicate(PPCPred);
+ }
+
+ unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+
+ if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CondReg))
+ return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
+ .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+ FastEmitBranch(FBB, DbgLoc);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DbgLoc);
+ return true;
+ }
+
+ // FIXME: ARM looks for a case where the block containing the compare
+ // has been split from the block containing the branch. If this happens,
+ // there is a vreg available containing the result of the compare. I'm
+ // not sure we can do much, as we've lost the predicate information with
+ // the compare instruction -- we have a 4-bit CR but don't know which bit
+ // to test here.
+ return false;
+}
+
+// Attempt to emit a compare of the two source values. Signed and unsigned
+// comparisons are supported. Return false if we can't handle it.
+bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
+ bool IsZExt, unsigned DestReg) {
+ Type *Ty = SrcValue1->getType();
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
+ return false;
+
+ // See if operand 2 is an immediate encodeable in the compare.
+ // FIXME: Operands are not in canonical order at -O0, so an immediate
+ // operand in position 1 is a lost opportunity for now. We are
+ // similar to ARM in this regard.
+ long Imm = 0;
+ bool UseImm = false;
+
+ // Only 16-bit integer constants can be represented in compares for
+ // PowerPC. Others will be materialized into a register.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
+ if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i8 || SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
+ if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
+ UseImm = true;
+ }
+ }
+
+ unsigned CmpOpc;
+ bool NeedsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ CmpOpc = PPC::FCMPUS;
+ break;
+ case MVT::f64:
+ CmpOpc = PPC::FCMPUD;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ NeedsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
+ break;
+ case MVT::i64:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
+ if (NeedsExt) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg1 = ExtReg;
+
+ if (!UseImm) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg2 = ExtReg;
+ }
+ }
+
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addImm(Imm);
+
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // No code is generated for a FP extend.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // Round the result to single precision.
+ unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP), DestReg)
+ .addReg(SrcReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
+// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+ bool IsSigned) {
+
+ // If necessary, extend 32-bit int to 64-bit.
+ if (SrcVT == MVT::i32) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return 0;
+ SrcReg = TmpReg;
+ }
+
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the GPR.
+ if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+ return 0;
+
+ // Load the integer value into an FPR. The kind of load used depends
+ // on a number of conditions.
+ unsigned LoadOpc = PPC::LFD;
+
+ if (SrcVT == MVT::i32) {
+ if (!IsSigned) {
+ LoadOpc = PPC::LFIWZX;
+ Addr.Offset = 4;
+ } else if (PPCSubTarget->hasLFIWAX()) {
+ LoadOpc = PPC::LFIWAX;
+ Addr.Offset = 4;
+ }
+ }
+
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+ MVT DstVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::f32 && DstVT != MVT::f64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
+ SrcVT != MVT::i32 && SrcVT != MVT::i64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // We can only lower an unsigned convert if we have the newer
+ // floating-point conversion operations.
+ if (!IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
+ // FIXME: For now we require the newer floating-point conversion operations
+ // (which are present only on P7 and A2 server models) when converting
+ // to single-precision float. Otherwise we have to generate a lot of
+ // fiddly code to avoid double rounding. If necessary, the fiddly code
+ // can be found in PPCTargetLowering::LowerINT_TO_FP().
+ if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
+ return false;
+
+ // Extend the input if necessary.
+ if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return false;
+ SrcVT = MVT::i64;
+ SrcReg = TmpReg;
+ }
+
+ // Move the integer value to an FPR.
+ unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+ if (FPReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion.
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned Opc;
+
+ if (DstVT == MVT::f32)
+ Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+ else
+ Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(FPReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned) {
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ // Note that if have STFIWX available, we could use a 4-byte stack
+ // slot for i32, but this being fast-isel we'll just go with the
+ // easiest code gen possible.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the FPR.
+ if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+ return 0;
+
+ // Reload it into a GPR. If we want an i32, modify the address
+ // to have a 4-byte offset so we load from the right place.
+ if (VT == MVT::i32)
+ Addr.Offset = 4;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+ MVT DstVT, SrcVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32 && DstVT != MVT::i64)
+ return false;
+
+ // If we don't have FCTIDUZ and we need it, punt to SelectionDAG.
+ if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT())
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Convert f32 to f64 if necessary. This is just a meaningless copy
+ // to get the register class right. COPY_TO_REGCLASS is needed since
+ // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+ const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+ if (InRC == &PPC::F4RCRegClass) {
+ unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+ .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+ SrcReg = TmpReg;
+ }
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+ unsigned Opc;
+
+ if (DstVT == MVT::i32)
+ if (IsSigned)
+ Opc = PPC::FCTIWZ;
+ else
+ Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ else
+ Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Now move the integer value from a float register to an integer register.
+ unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ if (IntReg == 0)
+ return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+// Attempt to fast-select a binary integer operation that isn't already
+// handled automatically.
+bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. If there is no register,
+ // make a conservative choice (don't assign R0).
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ &PPC::GPRC_and_GPRC_NOR0RegClass);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
+ break;
+ case ISD::OR:
+ Opc = IsGPRC ? PPC::OR : PPC::OR8;
+ break;
+ case ISD::SUB:
+ Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
+ break;
+ }
+
+ unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // Handle case of small immediate operand.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt &CIVal = ConstInt->getValue();
+ int Imm = (int)CIVal.getSExtValue();
+ bool UseImm = true;
+ if (isInt<16>(Imm)) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Missing case!");
+ case PPC::ADD4:
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ break;
+ case PPC::ADD8:
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ break;
+ case PPC::OR:
+ Opc = PPC::ORI;
+ break;
+ case PPC::OR8:
+ Opc = PPC::ORI8;
+ break;
+ case PPC::SUBF:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ case PPC::SUBF8:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ }
+
+ if (UseImm) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+ ResultReg)
+ .addReg(SrcReg1)
+ .addImm(Imm);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ // Reg-reg case.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ // Reverse operands for subtract-from.
+ if (ISDOpcode == ISD::SUB)
+ std::swap(SrcReg1, SrcReg2);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Handle arguments to a call that we're attempting to fast-select.
+// Return false if the arguments are too complex for us at the moment.
+bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+
+ // Reserve space for the linkage area on the stack.
+ bool isELFv2ABI = PPCSubTarget->isELFv2ABI();
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
+ CCInfo.AllocateStack(LinkageSize, 8);
+
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
+
+ // Bail out if we can't handle any of the arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Skip vector arguments for now, as well as long double and
+ // uint128_t, and anything that isn't passed in a register.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
+ !VA.isRegLoc() || VA.needsCustom())
+ return false;
+
+ // Skip bit-converted arguments for now.
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ return false;
+ }
+
+ // Get a count of how many bytes are to be pushed onto the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
+ NumBytes = std::max(NumBytes, LinkageSize + 64);
+
+ // Issue CALLSEQ_START.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TII.getCallFrameSetupOpcode()))
+ .addImm(NumBytes);
+
+ // Prepare to assign register arguments. Every argument uses up a
+ // GPR protocol register even if it's passed in a floating-point
+ // register.
+ unsigned NextGPR = PPC::X3;
+ unsigned NextFPR = PPC::F1;
+
+ // Process arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Handle argument promotion and bitcasts.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
+ llvm_unreachable("Failed to emit a sext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
+ llvm_unreachable("Failed to emit a zext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ // FIXME: Not yet handled.
+ llvm_unreachable("Should have bailed before getting here!");
+ break;
+ }
+ }
+
+ // Copy this argument to the appropriate register.
+ unsigned ArgReg;
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
+ ArgReg = NextFPR++;
+ ++NextGPR;
+ } else
+ ArgReg = NextGPR++;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
+ RegArgs.push_back(ArgReg);
+ }
+
+ return true;
+}
+
+// For a call that we've determined we can fast-select, finish the
+// call sequence and generate a copy to obtain the return value (if any).
+void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg) {
+ // Issue CallSEQ_END.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TII.getCallFrameDestroyOpcode()))
+ .addImm(NumBytes).addImm(0);
+
+ // Next, generate a copy to obtain the return value.
+ // FIXME: No multi-register return values yet, though I don't foresee
+ // any real difficulties there.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ CCValAssign &VA = RVLocs[0];
+ assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ MVT DestVT = VA.getValVT();
+ MVT CopyVT = DestVT;
+
+ // Ints smaller than a register still arrive in a full 64-bit
+ // register, so make sure we recognize this.
+ if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
+ CopyVT = MVT::i64;
+
+ unsigned SourcePhysReg = VA.getLocReg();
+ unsigned ResultReg = 0;
+
+ if (RetVT == CopyVT) {
+ const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
+ ResultReg = createResultReg(CpyRC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+
+ // If necessary, round the floating result to single precision.
+ } else if (CopyVT == MVT::f64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
+ ResultReg).addReg(SourcePhysReg);
+
+ // If only the low half of a general register is needed, generate
+ // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
+ // used along the fast-isel path (not lowered), and downstream logic
+ // also doesn't like a direct subreg copy on a physical reg.)
+ } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
+ ResultReg = createResultReg(&PPC::GPRCRegClass);
+ // Convert physical register from G8RC to GPRC.
+ SourcePhysReg -= PPC::X0 - PPC::R0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+ }
+
+ assert(ResultReg && "ResultReg unset!");
+ UsedRegs.push_back(SourcePhysReg);
+ UpdateValueMap(I, ResultReg);
+ }
+}
+
+// Attempt to fast-select a call instruction.
+bool PPCFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall())
+ return false;
+
+ // Obtain calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool IsVarArg = FTy->isVarArg();
+
+ // Not ready for varargs yet.
+ if (IsVarArg)
+ return false;
+
+ // Handle simple calls for now, with legal return types and
+ // those that can be extended.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8)
+ return false;
+
+ // FIXME: No multi-register return values yet.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
+ RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
+ RetVT != MVT::f64) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ if (RVLocs.size() > 1)
+ return false;
+ }
+
+ // Bail early if more than 8 arguments, as we only currently
+ // handle arguments passed in registers.
+ unsigned NumArgs = CS.arg_size();
+ if (NumArgs > 8)
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+
+ Args.reserve(NumArgs);
+ ArgRegs.reserve(NumArgs);
+ ArgVTs.reserve(NumArgs);
+ ArgFlags.reserve(NumArgs);
+
+ for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
+ II != IE; ++II) {
+ // FIXME: ARM does something for intrinsic calls here, check into that.
+
+ unsigned AttrIdx = II - CS.arg_begin() + 1;
+
+ // Only handle easy calls for now. It would be reasonably easy
+ // to handle <= 8-byte structures passed ByVal in registers, but we
+ // have to ensure they are right-justified in the register.
+ if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
+ CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
+ CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+ return false;
+
+ ISD::ArgFlagsTy Flags;
+ if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
+ Flags.setZExt();
+
+ Type *ArgTy = (*II)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
+ return false;
+
+ if (ArgVT.isVector())
+ return false;
+
+ unsigned Arg = getRegForValue(*II);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*II);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Process the arguments.
+ SmallVector<unsigned, 8> RegArgs;
+ unsigned NumBytes;
+
+ if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, IsVarArg))
+ return false;
+
+ // FIXME: No handling for function pointers yet. This requires
+ // implementing the function descriptor (OPD) setup.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
+ MIB.addReg(RegArgs[II], RegState::Implicit);
+
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (PPCSubTarget->isELFv2ABI())
+ MIB.addReg(PPC::X2, RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
+
+ // Set all unused physregs defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+// Attempt to fast-select a return instruction.
+bool PPCFastISel::SelectRet(const Instruction *I) {
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+ CallingConv::ID CC = F.getCallingConv();
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
+ const Value *RV = Ret->getOperand(0);
+
+ // FIXME: Only one output register for now.
+ if (ValLocs.size() > 1)
+ return false;
+
+ // Special case for returning a constant integer of any size.
+ // Materialize the constant as an i64 and copy it to the return
+ // register. This avoids an unnecessary extend or truncate.
+ if (isa<ConstantInt>(*RV)) {
+ const Constant *C = cast<Constant>(RV);
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
+ unsigned RetReg = ValLocs[0].getLocReg();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
+ RetRegs.push_back(RetReg);
+
+ } else {
+ unsigned Reg = getRegForValue(RV);
+
+ if (Reg == 0)
+ return false;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i < ValLocs.size(); ++i) {
+
+ CCValAssign &VA = ValLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ RetRegs.push_back(VA.getLocReg());
+ unsigned SrcReg = Reg + VA.getValNo();
+
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple())
+ return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getLocVT();
+
+ if (RVVT != DestVT && RVVT != MVT::i8 &&
+ RVVT != MVT::i16 && RVVT != MVT::i32)
+ return false;
+
+ if (RVVT != DestVT) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ llvm_unreachable("Full value assign but types don't match?");
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ case CCValAssign::SExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ }
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), RetRegs[i])
+ .addReg(SrcReg);
+ }
+ }
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::BLR));
+
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+
+ return true;
+}
+
+// Attempt to emit an integer extend of SrcReg into DestReg. Both
+// signed and zero extensions are supported. Return false if we
+// can't handle it.
+bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i64)
+ return false;
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ return false;
+
+ // Signed extensions use EXTSB, EXTSH, EXTSW.
+ if (!IsZExt) {
+ unsigned Opc;
+ if (SrcVT == MVT::i8)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
+ else if (SrcVT == MVT::i16)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
+ else {
+ assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
+ Opc = PPC::EXTSW_32_64;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Unsigned 32-bit extensions use RLWINM.
+ } else if (DestVT == MVT::i32) {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 24;
+ else {
+ assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
+ MB = 16;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
+ DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
+
+ // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
+ } else {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 56;
+ else if (SrcVT == MVT::i16)
+ MB = 48;
+ else
+ MB = 32;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(PPC::RLDICL_32_64), DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select an indirect branch instruction.
+bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0)
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
+ .addReg(AddrReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
+ return true;
+}
+
+// Attempt to fast-select an integer truncate instruction.
+bool PPCFastISel::SelectTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
+ return false;
+
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // The only interesting case is when we need to switch register classes.
+ if (SrcVT == MVT::i64) {
+ unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(SrcReg, 0, PPC::sub_32);
+ SrcReg = ResultReg;
+ }
+
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select an integer extend instruction.
+bool PPCFastISel::SelectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool IsZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+
+ // If we know the register class needed for the result of this
+ // instruction, use it. Otherwise pick the register class of the
+ // correct size that does not contain X0/R0, since we don't know
+ // whether downstream uses permit that assignment.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass));
+ unsigned ResultReg = createResultReg(RC);
+
+ if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Attempt to fast-select an instruction that wasn't handled by
+// the table-generated machinery.
+bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*IsSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*IsSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*IsSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*IsSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::Call:
+ if (dyn_cast<IntrinsicInst>(I))
+ return false;
+ return SelectCall(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ // Here add other flavors of Instruction::XXX that automated
+ // cases don't catch. For example, switches are terminators
+ // that aren't yet handled.
+ default:
+ break;
+ }
+ return false;
+}
+
+// Materialize a floating-point constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ // No plans to handle long double here.
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
+ // All FP constants are loaded from the constant pool.
+ unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
+ assert(Align > 0 && "Unexpectedly missing alignment information!");
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+ (VT == MVT::f32) ? 4 : 8, Align);
+
+ unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+ unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
+ TmpReg)
+ .addConstantPoolIndex(Idx).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg).addMemOperand(MMO);
+ } else {
+ // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
+ // But for large code model, we must generate a LDtocL followed
+ // by the LF[SD].
+ if (CModel == CodeModel::Large) {
+ unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
+ TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg2);
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+ .addReg(TmpReg)
+ .addMemOperand(MMO);
+ }
+
+ return DestReg;
+}
+
+// Materialize the address of a global value into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ assert(VT == MVT::i64 && "Non-address!");
+ const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // Global values may be plain old object addresses, TLS object
+ // addresses, constant pool entries, or jump tables. How we generate
+ // code for these may depend on small, medium, or large code model.
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ // FIXME: Jump tables are not yet required because fast-isel doesn't
+ // handle switches; if that changes, we need them as well. For now,
+ // what follows assumes everything's a generic (or TLS) global address.
+
+ // FIXME: We don't yet handle the complexity of TLS.
+ if (GV->isThreadLocal())
+ return 0;
+
+ // For small code model, generate a simple TOC load.
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
+ DestReg)
+ .addGlobalAddress(GV)
+ .addReg(PPC::X2);
+ else {
+ // If the address is an externally defined symbol, a symbol with common
+ // or externally available linkage, a non-local function address, or a
+ // jump table address (not yet needed), or if we are generating code
+ // for large code model, we generate:
+ // LDtocL(GV, ADDIStocHA(%X2, GV))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, GV), GV)
+ // Either way, start with the ADDIStocHA:
+ unsigned HighPartReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA),
+ HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
+
+ // If/when switches are implemented, jump tables should be handled
+ // on the "if" path here.
+ if (CModel == CodeModel::Large ||
+ (GV->getType()->getElementType()->isFunctionTy() &&
+ (GV->isDeclaration() || GV->isWeakForLinker())) ||
+ GV->isDeclaration() || GV->hasCommonLinkage() ||
+ GV->hasAvailableExternallyLinkage())
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
+ DestReg).addGlobalAddress(GV).addReg(HighPartReg);
+ else
+ // Otherwise generate the ADDItocL.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
+ DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+ }
+
+ return DestReg;
+}
+
+// Materialize a 32-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ unsigned ResultReg = createResultReg(RC);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ if (isInt<16>(Imm))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
+ .addImm(Imm);
+ else if (Lo) {
+ // Both Lo and Hi have nonzero bits.
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
+ .addImm(Hi);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
+ .addReg(TmpReg).addImm(Lo);
+ } else
+ // Just Hi bits.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
+ .addImm(Hi);
+
+ return ResultReg;
+}
+
+// Materialize a 64-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned Remainder = 0;
+ unsigned Shift = 0;
+
+ // If the value doesn't fit in 32 bits, see if we can shift it
+ // so that it fits in 32 bits.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ if (isInt<32>(ImmSh))
+ Imm = ImmSh;
+ else {
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Handle the high-order 32 bits (if shifted) or the whole 32 bits
+ // (if not shifted).
+ unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
+ if (!Shift)
+ return TmpReg1;
+
+ // If upper 32 bits were not zero, we've built them and need to shift
+ // them into place.
+ unsigned TmpReg2;
+ if (Imm) {
+ TmpReg2 = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
+ TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
+ } else
+ TmpReg2 = TmpReg1;
+
+ unsigned TmpReg3, Hi, Lo;
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ TmpReg3 = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
+ TmpReg3).addReg(TmpReg2).addImm(Hi);
+ } else
+ TmpReg3 = TmpReg2;
+
+ if ((Lo = Remainder & 0xFFFF)) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
+ ResultReg).addReg(TmpReg3).addImm(Lo);
+ return ResultReg;
+ }
+
+ return TmpReg3;
+}
+
+
+// Materialize an integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+
+ // If the constant is in range, use a load-immediate.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (isInt<16>(CI->getSExtValue())) {
+ unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+ unsigned ImmReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
+ .addImm(CI->getSExtValue());
+ return ImmReg;
+ }
+
+ // Construct the constant piecewise.
+ int64_t Imm = CI->getZExtValue();
+
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else if (VT == MVT::i32)
+ return PPCMaterialize32BitInt(Imm, RC);
+
+ return 0;
+}
+
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return PPCMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return PPCMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return PPCMaterializeInt(C, VT);
+
+ return 0;
+}
+
+// Materialize the address created by an alloca into a register, and
+// return the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(SI->second).addImm(0);
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+// Fold loads into extends when possible.
+// FIXME: We can have multiple redundant extend/trunc instructions
+// following a load. The folding only picks up one. Extend this
+// to check subsequent instructions for the same pattern and remove
+// them. Thus ResultReg should be the def reg for the last redundant
+// instruction in a chain, and all intervening instructions can be
+// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
+// to add ELF64-NOT: rldicl to the appropriate tests when this works.
+bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ bool IsZExt = false;
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+
+ case PPC::RLDICL:
+ case PPC::RLDICL_32_64: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 56) ||
+ (VT == MVT::i16 && MB <= 48) ||
+ (VT == MVT::i32 && MB <= 32))
+ break;
+ return false;
+ }
+
+ case PPC::RLWINM:
+ case PPC::RLWINM8: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 24) ||
+ (VT == MVT::i16 && MB <= 16))
+ break;
+ return false;
+ }
+
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ /* There is no sign-extending load-byte instruction. */
+ return false;
+
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64: {
+ if (VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64: {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+ }
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(LI->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+
+ if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
+ return false;
+
+ MI->eraseFromParent();
+ return true;
+}
+
+// Attempt to lower call arguments in a faster way than done by
+// the selection DAG code.
+bool PPCFastISel::FastLowerArguments() {
+ // Defer to normal argument lowering for now. It's reasonably
+ // efficient. Consider doing something like ARM to handle the
+ // case where all args fit in registers, no varargs, no float
+ // or vector args.
+ return false;
+}
+
+// Handle materializing integer constants into a register. This is not
+// automatically generated for PowerPC, so must be explicitly created here.
+unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+
+ if (Opc != ISD::Constant)
+ return 0;
+
+ // If we're using CR bit registers for i1 values, handle that as a special
+ // case first.
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
+ unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
+ return ImmReg;
+ }
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else
+ return PPCMaterialize32BitInt(Imm, RC);
+}
+
+// Override for ADDI and ADDI8 to set the correct register class
+// on RHS operand 0. The automatic infrastructure naively assumes
+// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
+// for these cases. At the moment, none of the other automatically
+// generated RI instructions require special treatment. However, once
+// SelectSelect is implemented, "isel" requires similar handling.
+//
+// Also be conservative about the output register class. Avoid
+// assigning R0 or X0 to the output register for GPRC and G8RC
+// register classes, as any such result could be used in ADDI, etc.,
+// where those regs have another meaning.
+unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ if (MachineInstOpcode == PPC::ADDI)
+ MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ else if (MachineInstOpcode == PPC::ADDI8)
+ MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ Op0, Op0IsKill, Imm);
+}
+
+// Override for instructions with one register operand to avoid use of
+// R0/X0. The automatic infrastructure isn't aware of the context so
+// we must be conservative.
+unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+}
+
+// Override for instructions with two register operands to avoid use
+// of R0/X0. The automatic infrastructure isn't aware of the context
+// so we must be conservative.
+unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ Op1, Op1IsKill);
+}
+
+namespace llvm {
+ // Create the fast instruction selector for PowerPC64 ELF.
+ FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ const TargetMachine &TM = FuncInfo.MF->getTarget();
+
+ // Only available on 64-bit ELF for now.
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
+ return new PPCFastISel(FuncInfo, LibInfo);
+
+ return nullptr;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 000000000000..b2577a9c7cf7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,1604 @@
+//===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const uint16_t VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
+ (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0),
+ Subtarget(STI) {}
+
+// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
+ unsigned &NumEntries) const {
+ if (Subtarget.isDarwinABI()) {
+ NumEntries = 1;
+ if (Subtarget.isPPC64()) {
+ static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+ return &darwin64Offsets;
+ } else {
+ static const SpillSlot darwinOffsets = {PPC::R31, -4};
+ return &darwinOffsets;
+ }
+ }
+
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ NumEntries = 0;
+ return nullptr;
+ }
+
+ // Note that the offsets here overlap, but this is fixed up in
+ // processFunctionBeforeFrameFinalized.
+
+ static const SpillSlot Offsets[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::R31, -4},
+ {PPC::R30, -8},
+ {PPC::R29, -12},
+ {PPC::R28, -16},
+ {PPC::R27, -20},
+ {PPC::R26, -24},
+ {PPC::R25, -28},
+ {PPC::R24, -32},
+ {PPC::R23, -36},
+ {PPC::R22, -40},
+ {PPC::R21, -44},
+ {PPC::R20, -48},
+ {PPC::R19, -52},
+ {PPC::R18, -56},
+ {PPC::R17, -60},
+ {PPC::R16, -64},
+ {PPC::R15, -68},
+ {PPC::R14, -72},
+
+ // CR save area offset. We map each of the nonvolatile CR fields
+ // to the slot for CR2, which is the first of the nonvolatile CR
+ // fields to be assigned, so that we only allocate one save slot.
+ // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
+ {PPC::CR2, -4},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ static const SpillSlot Offsets64[] = {
+ // Floating-point register save area offsets.
+ {PPC::F31, -8},
+ {PPC::F30, -16},
+ {PPC::F29, -24},
+ {PPC::F28, -32},
+ {PPC::F27, -40},
+ {PPC::F26, -48},
+ {PPC::F25, -56},
+ {PPC::F24, -64},
+ {PPC::F23, -72},
+ {PPC::F22, -80},
+ {PPC::F21, -88},
+ {PPC::F20, -96},
+ {PPC::F19, -104},
+ {PPC::F18, -112},
+ {PPC::F17, -120},
+ {PPC::F16, -128},
+ {PPC::F15, -136},
+ {PPC::F14, -144},
+
+ // General register save area offsets.
+ {PPC::X31, -8},
+ {PPC::X30, -16},
+ {PPC::X29, -24},
+ {PPC::X28, -32},
+ {PPC::X27, -40},
+ {PPC::X26, -48},
+ {PPC::X25, -56},
+ {PPC::X24, -64},
+ {PPC::X23, -72},
+ {PPC::X22, -80},
+ {PPC::X21, -88},
+ {PPC::X20, -96},
+ {PPC::X19, -104},
+ {PPC::X18, -112},
+ {PPC::X17, -120},
+ {PPC::X16, -128},
+ {PPC::X15, -136},
+ {PPC::X14, -144},
+
+ // VRSAVE save area offset.
+ {PPC::VRSAVE, -4},
+
+ // Vector register save area
+ {PPC::V31, -16},
+ {PPC::V30, -32},
+ {PPC::V29, -48},
+ {PPC::V28, -64},
+ {PPC::V27, -80},
+ {PPC::V26, -96},
+ {PPC::V25, -112},
+ {PPC::V24, -128},
+ {PPC::V23, -144},
+ {PPC::V22, -160},
+ {PPC::V21, -176},
+ {PPC::V20, -192}};
+
+ if (Subtarget.isPPC64()) {
+ NumEntries = array_lengthof(Offsets64);
+
+ return Offsets64;
+ } else {
+ NumEntries = array_lengthof(Offsets);
+
+ return Offsets;
+ }
+}
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function. Remove all of the VRSAVE related code from the function.
+/// FIXME: The removal of the code results in a compile failure at -O0 when the
+/// function contains a function call, as the GPR containing original VRSAVE
+/// contents is spilled and reloaded around the call. Without the prolog code,
+/// the spill instruction refers to an undefined register. This code needs
+/// to account for all uses of that GPR.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+ MachineBasicBlock *Entry = MI->getParent();
+ MachineFunction *MF = Entry->getParent();
+
+ // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
+ MachineBasicBlock::iterator MBBI = MI;
+ ++MBBI;
+ assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+ MBBI->eraseFromParent();
+
+ bool RemovedAllMTVRSAVEs = true;
+ // See if we can find and remove the MTVRSAVE instruction from all of the
+ // epilog blocks.
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+ // If last instruction is a return instruction, add an epilogue
+ if (!I->empty() && I->back().isReturn()) {
+ bool FoundIt = false;
+ for (MBBI = I->end(); MBBI != I->begin(); ) {
+ --MBBI;
+ if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+ MBBI->eraseFromParent(); // remove it.
+ FoundIt = true;
+ break;
+ }
+ }
+ RemovedAllMTVRSAVEs &= FoundIt;
+ }
+ }
+
+ // If we found and removed all MTVRSAVE instructions, remove the read of
+ // VRSAVE as well.
+ if (RemovedAllMTVRSAVEs) {
+ MBBI = MI;
+ assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+ --MBBI;
+ assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+ MBBI->eraseFromParent();
+ }
+
+ // Finally, nuke the UPDATE_VRSAVE.
+ MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector. Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+ MachineFunction *MF = MI->getParent()->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ unsigned UsedRegMask = 0;
+ for (unsigned i = 0; i != 32; ++i)
+ if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+ UsedRegMask |= 1 << (31-i);
+
+ // Live in and live out values already must be in the mask, so don't bother
+ // marking them.
+ for (MachineRegisterInfo::livein_iterator
+ I = MF->getRegInfo().livein_begin(),
+ E = MF->getRegInfo().livein_end(); I != E; ++I) {
+ unsigned RegNo = TRI->getEncodingValue(I->first);
+ if (VRRegNo[RegNo] == I->first) // If this really is a vector reg.
+ UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
+ }
+
+ // Live out registers appear as use operands on return instructions.
+ for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
+ UsedRegMask != 0 && BI != BE; ++BI) {
+ const MachineBasicBlock &MBB = *BI;
+ if (MBB.empty() || !MBB.back().isReturn())
+ continue;
+ const MachineInstr &Ret = MBB.back();
+ for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = Ret.getOperand(I);
+ if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
+ continue;
+ unsigned RegNo = TRI->getEncodingValue(MO.getReg());
+ UsedRegMask &= ~(1 << (31-RegNo));
+ }
+ }
+
+ // If no registers are used, turn this into a copy.
+ if (UsedRegMask == 0) {
+ // Remove all VRSAVE code.
+ RemoveVRSaveCode(MI);
+ return;
+ }
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask);
+ } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+ } else {
+ if (DstReg != SrcReg)
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg)
+ .addImm(UsedRegMask >> 16);
+ else
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+ .addReg(SrcReg, RegState::Kill)
+ .addImm(UsedRegMask >> 16);
+
+ BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+ .addReg(DstReg, RegState::Kill)
+ .addImm(UsedRegMask & 0xFFFF);
+ }
+
+ // Remove the old UPDATE_VRSAVE instruction.
+ MI->eraseFromParent();
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isCRSpilled();
+}
+
+static bool spillsVRSAVE(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->isVRSAVESpilled();
+}
+
+static bool hasSpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasSpills();
+}
+
+static bool hasNonRISpills(const MachineFunction &MF) {
+ const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ return FuncInfo->hasNonRISpills();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF,
+ bool UseEstimate) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize =
+ UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
+
+ // Get stack alignments. The frame must be aligned to the greatest of these:
+ unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
+ unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
+ unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+
+ // If we are a leaf function, and use up to 224 bytes of stack space,
+ // don't have a frame pointer, calls, or dynamic alloca then we do not need
+ // to adjust the stack pointer (we fit in the Red Zone).
+ // The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
+ // stackless code if all local vars are reg-allocated.
+ bool DisableRedZone = MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
+ if (!DisableRedZone &&
+ (Subtarget.isPPC64() || // 32-bit SVR4, no stack-
+ !Subtarget.isSVR4ABI() || // allocated locals.
+ FrameSize == 0) &&
+ FrameSize <= 224 && // Fits in red zone.
+ !MFI->hasVarSizedObjects() && // No dynamic alloca.
+ !MFI->adjustsStack() && // No calls.
+ !RegInfo->hasBasePointer(MF)) { // No special alignment.
+ // No need for frame
+ if (UpdateMF)
+ MFI->setStackSize(0);
+ return 0;
+ }
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // Maximum call frame needs to be at least big enough for linkage area.
+ unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(),
+ Subtarget.isDarwinABI(),
+ Subtarget.isELFv2ABI());
+ maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ if (UpdateMF)
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ if (UpdateMF)
+ MFI->setStackSize(FrameSize);
+
+ return FrameSize;
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // FIXME: This is pretty much broken by design: hasFP() might be called really
+ // early, before the stack layout was calculated and thus hasFP() might return
+ // true or false here depending on the time of call.
+ return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Naked functions have no stack frame pushed, so we don't have a frame
+ // pointer.
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ return false;
+
+ return MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects() ||
+ (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
+ bool is31 = needsFP(MF);
+ unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
+ unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
+ unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI)
+ for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
+ --MBBI;
+ for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
+ MachineOperand &MO = MBBI->getOperand(I);
+ if (!MO.isReg())
+ continue;
+
+ switch (MO.getReg()) {
+ case PPC::FP:
+ MO.setReg(FPReg);
+ break;
+ case PPC::FP8:
+ MO.setReg(FP8Reg);
+ break;
+ case PPC::BP:
+ MO.setReg(BPReg);
+ break;
+ case PPC::BP8:
+ MO.setReg(BP8Reg);
+ break;
+
+ }
+ }
+ }
+}
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ DebugLoc dl;
+ bool needsFrameMoves = MMI.hasDebugInfo() ||
+ MF.getFunction()->needsUnwindTableEntry();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ assert((isDarwinABI || isSVR4ABI) &&
+ "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+
+ // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
+ // process it.
+ if (!isSVR4ABI)
+ for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+ if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+ HandleVRSaveUpdate(MBBI, TII);
+ break;
+ }
+ }
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ unsigned FrameSize = determineFrameLayout(MF);
+ int NegFrameSize = -FrameSize;
+ if (!isInt<32>(NegFrameSize))
+ llvm_unreachable("Unhandled stack size!");
+
+ if (MFI->isFrameAddressTaken())
+ replaceFPWithRealFP(MF);
+
+ // Check if the link register (LR) must be saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ // Do we have a frame pointer and/or base pointer for this function?
+ bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
+
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
+ const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
+ : PPC::MFLR );
+ const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
+ : PPC::STW );
+ const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
+ : PPC::STWU );
+ const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
+ : PPC::STWUX);
+ const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
+ : PPC::OR );
+ const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
+ : PPC::SUBFC);
+ const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
+ : PPC::SUBFIC);
+
+ // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
+ // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
+ // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
+ // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
+ assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
+ "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset =
+ PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ int BPOffset = 0;
+ if (HasBP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset =
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
+ }
+ }
+
+ // Get stack alignments.
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ if (HasBP && MaxAlign > 1)
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
+ bool isLargeFrame = !isInt<16>(NegFrameSize);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
+
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Prologue CR saving supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ // FIXME: In the ELFv2 ABI, we are not required to save all CR fields.
+ // If only one or two CR fields are clobbered, it could be more
+ // efficient to use mfocrf to selectively save just those fields.
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+ }
+
+ if (HasFP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (MustSaveLR)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(8)
+ .addReg(SPReg);
+
+ // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
+ if (!FrameSize) return;
+
+ // Adjust stack pointer: r1 += NegFrameSize.
+ // If there is a preferred stack alignment, align R1 now
+
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, OrInst, BPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+ }
+
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(32 - Log2_32(MaxAlign))
+ .addImm(31);
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
+ }
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
+
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
+
+ // Add the "machine moves" for the instructions we generated above, but in
+ // reverse order.
+ if (needsFrameMoves) {
+ // Show update of SP.
+ assert(NegFrameSize);
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ if (HasFP) {
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (HasBP) {
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (MustSaveLR) {
+ unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+
+ // If there is a frame pointer, copy R1 into R31
+ if (HasFP) {
+ BuildMI(MBB, MBBI, dl, OrInst, FPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer is ready.
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+
+ if (needsFrameMoves) {
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+
+ // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
+ // subregisters of CR2. We just need to emit a move of CR2.
+ if (PPC::CRBITRCRegClass.contains(Reg))
+ continue;
+
+ // For SVR4, don't emit a move for the CR spill slot if we haven't
+ // spilled CRs.
+ if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+ && MustSaveCRs.empty())
+ continue;
+
+ // For 64-bit SVR4 when we have spilled CRs, the spill location
+ // is SP+8, not a frame-relative slot.
+ if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
+ // the whole CR word. In the ELFv2 ABI, every CR that was
+ // actually saved gets its own CFI record.
+ unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ continue;
+ }
+
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+ }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI != MBB.end() && "Returning block has no terminator");
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+
+ unsigned RetOpcode = MBBI->getOpcode();
+ DebugLoc dl;
+
+ assert((RetOpcode == PPC::BLR ||
+ RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8) &&
+ "Can only insert epilog into returning blocks");
+
+ // Get alignment info so we know how to restore the SP.
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes allocated from the FrameInfo.
+ int FrameSize = MFI->getStackSize();
+
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
+
+ // Check if the link register (LR) has been saved.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ bool MustSaveLR = FI->mustSaveLR();
+ const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ // Do we have a frame pointer and/or base pointer for this function?
+ bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
+
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = RegInfo->getBaseRegister(MF);
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
+ : PPC::MTLR );
+ const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
+ : PPC::LWZ );
+ const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
+ : PPC::ADDI );
+ const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
+ : PPC::ADD4 );
+
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+ int FPOffset = 0;
+ if (HasFP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int FPIndex = FI->getFramePointerSaveIndex();
+ assert(FPIndex && "No Frame Pointer Save Slot!");
+ FPOffset = FFI->getObjectOffset(FPIndex);
+ } else {
+ FPOffset =
+ PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
+ int BPOffset = 0;
+ if (HasBP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset =
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64,
+ isDarwinABI,
+ isPIC);
+ }
+ }
+
+ bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
+ RetOpcode == PPC::TCRETURNdi ||
+ RetOpcode == PPC::TCRETURNai ||
+ RetOpcode == PPC::TCRETURNri8 ||
+ RetOpcode == PPC::TCRETURNdi8 ||
+ RetOpcode == PPC::TCRETURNai8;
+
+ if (UsesTCRet) {
+ int MaxTCRetDelta = FI->getTailCallSPDelta();
+ MachineOperand &StackAdjust = MBBI->getOperand(1);
+ assert(StackAdjust.isImm() && "Expecting immediate value.");
+ // Adjust stack pointer.
+ int StackAdj = StackAdjust.getImm();
+ int Delta = StackAdj - MaxTCRetDelta;
+ assert((Delta >= 0) && "Delta must be positive");
+ if (MaxTCRetDelta>0)
+ FrameSize += (StackAdj +Delta);
+ else
+ FrameSize += StackAdj;
+ }
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple LD/LWZ immediate offset operand.
+ bool isLargeFrame = !isInt<16>(FrameSize);
+
+ if (FrameSize) {
+ // In the prologue, the loaded (or persistent) stack pointer value is offset
+ // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now.
+
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall()) {
+ assert(HasFP && "Expecting a valid frame pointer.");
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(FPReg).addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(FrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(FrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
+ }
+ } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg)
+ .addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
+ .addImm(0)
+ .addReg(SPReg);
+ }
+
+ }
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Epilogue CR restoring supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
+ .addImm(8)
+ .addReg(SPReg);
+
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ .addReg(TempReg, getKillRegState(i == e-1));
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
+
+ // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+ // call optimization
+ if (MF.getTarget().Options.GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+ MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(CallerAllocatedAmt >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(CallerAllocatedAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
+ }
+ } else if (RetOpcode == PPC::TCRETURNdi) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+ } else if (RetOpcode == PPC::TCRETURNai) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+ } else if (RetOpcode == PPC::TCRETURNdi8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+ } else if (RetOpcode == PPC::TCRETURNri8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+ } else if (RetOpcode == PPC::TCRETURNai8) {
+ MBBI = MBB.getLastNonDebugInstr();
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+ }
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+ const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+ // We need a save/restore of LR if there is any def of LR (which is
+ // defined by calls, including the PIC setup sequence), or if there is
+ // some use of the LR stack slot (e.g. for builtin_return_address).
+ // (LR comes in 32 and 64 bit versions.)
+ MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+ return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *) const {
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+
+ // Save and clear the LR state.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ unsigned LR = RegInfo->getRARegister();
+ FI->setMustSaveLR(MustSaveLR(MF, LR));
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.setPhysRegUnused(LR);
+
+ // Save R31 if necessary
+ int FPSI = FI->getFramePointerSaveIndex();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isPIC = MF.getTarget().getRelocationModel() == Reloc::PIC_;
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI && needsFP(MF)) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+
+ int BPSI = FI->getBasePointerSaveIndex();
+ if (!BPSI && RegInfo->hasBasePointer(MF)) {
+ int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI, isPIC);
+ // Allocate the frame index for the base pointer save area.
+ BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
+ // Save the result.
+ FI->setBasePointerSaveIndex(BPSI);
+ }
+
+ // Reserve stack space to move the linkage area to in case of a tail call.
+ int TCSPDelta = 0;
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+ MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+ }
+
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // function uses CR 2, 3, or 4.
+ if (!isPPC64 && !isDarwinABI &&
+ (MRI.isPhysRegUsed(PPC::CR2) ||
+ MRI.isPhysRegUsed(PPC::CR3) ||
+ MRI.isPhysRegUsed(PPC::CR4))) {
+ int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+ FI->setCRSpillFrameIndex(FrameIdx);
+ }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Early exit if not using the SVR4 ABI.
+ if (!Subtarget.isSVR4ABI()) {
+ addScavengingSpillSlot(MF, RS);
+ return;
+ }
+
+ // Get callee saved register information.
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+ // Early exit if no callee saved registers are modified!
+ if (CSI.empty() && !needsFP(MF)) {
+ addScavengingSpillSlot(MF, RS);
+ return;
+ }
+
+ unsigned MinGPR = PPC::R31;
+ unsigned MinG8R = PPC::X31;
+ unsigned MinFPR = PPC::F31;
+ unsigned MinVR = PPC::V31;
+
+ bool HasGPSaveArea = false;
+ bool HasG8SaveArea = false;
+ bool HasFPSaveArea = false;
+ bool HasVRSAVESaveArea = false;
+ bool HasVRSaveArea = false;
+
+ SmallVector<CalleeSavedInfo, 18> GPRegs;
+ SmallVector<CalleeSavedInfo, 18> G8Regs;
+ SmallVector<CalleeSavedInfo, 18> FPRegs;
+ SmallVector<CalleeSavedInfo, 18> VRegs;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (PPC::GPRCRegClass.contains(Reg)) {
+ HasGPSaveArea = true;
+
+ GPRegs.push_back(CSI[i]);
+
+ if (Reg < MinGPR) {
+ MinGPR = Reg;
+ }
+ } else if (PPC::G8RCRegClass.contains(Reg)) {
+ HasG8SaveArea = true;
+
+ G8Regs.push_back(CSI[i]);
+
+ if (Reg < MinG8R) {
+ MinG8R = Reg;
+ }
+ } else if (PPC::F8RCRegClass.contains(Reg)) {
+ HasFPSaveArea = true;
+
+ FPRegs.push_back(CSI[i]);
+
+ if (Reg < MinFPR) {
+ MinFPR = Reg;
+ }
+ } else if (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)) {
+ ; // do nothing, as we already know whether CRs are spilled
+ } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
+ HasVRSAVESaveArea = true;
+ } else if (PPC::VRRCRegClass.contains(Reg)) {
+ HasVRSaveArea = true;
+
+ VRegs.push_back(CSI[i]);
+
+ if (Reg < MinVR) {
+ MinVR = Reg;
+ }
+ } else {
+ llvm_unreachable("Unknown RegisterClass!");
+ }
+ }
+
+ PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ int64_t LowerBound = 0;
+
+ // Take into account stack space reserved for tail calls.
+ int TCSPDelta = 0;
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+ LowerBound = TCSPDelta;
+ }
+
+ // The Floating-point register save area is right below the back chain word
+ // of the previous stack frame.
+ if (HasFPSaveArea) {
+ for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+ int FI = FPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
+ }
+
+ // Check whether the frame pointer register is allocated. If so, make sure it
+ // is spilled to the correct offset.
+ if (needsFP(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getFramePointerSaveIndex();
+ assert(FI && "No Frame Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (RegInfo->hasBasePointer(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getBasePointerSaveIndex();
+ assert(FI && "No Base Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // General register save area starts right below the Floating-point
+ // register save area.
+ if (HasGPSaveArea || HasG8SaveArea) {
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+ int FI = GPRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ // Move general register save area spill slots down, taking into account
+ // the size of the Floating-point register save area.
+ for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+ int FI = G8Regs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
+ unsigned MinReg =
+ std::min<unsigned>(TRI->getEncodingValue(MinGPR),
+ TRI->getEncodingValue(MinG8R));
+
+ if (Subtarget.isPPC64()) {
+ LowerBound -= (31 - MinReg + 1) * 8;
+ } else {
+ LowerBound -= (31 - MinReg + 1) * 4;
+ }
+ }
+
+ // For 32-bit only, the CR save area is below the general register
+ // save area. For 64-bit SVR4, the CR save area is addressed relative
+ // to the stack pointer and hence does not need an adjustment here.
+ // Only CR2 (the first nonvolatile spilled) has an associated frame
+ // index so that we have a single uniform save area.
+ if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
+ // Adjust the frame index of the CR spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
+ // Leave Darwin logic as-is.
+ || (!Subtarget.isSVR4ABI() &&
+ (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)))) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The CR save area is always 4 bytes long.
+ }
+
+ if (HasVRSAVESaveArea) {
+ // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+ // which have the VRSAVE register class?
+ // Adjust the frame index of the VRSAVE spill slot.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ if (PPC::VRSAVERCRegClass.contains(Reg)) {
+ int FI = CSI[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+ }
+
+ if (HasVRSaveArea) {
+ // Insert alignment padding, we need 16-byte alignment.
+ LowerBound = (LowerBound - 15) & ~(15);
+
+ for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+ int FI = VRegs[i].getFrameIdx();
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+ }
+
+ addScavengingSpillSlot(MF, RS);
+}
+
+void
+PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
+ RegScavenger *RS) const {
+ // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+ // a large stack, which will require scavenging a register to materialize a
+ // large offset.
+
+ // We need to have a scavenger spill slot for spills if the frame size is
+ // large. In case there is no free register for large-offset addressing,
+ // this slot is used for the necessary emergency spill. Also, we need the
+ // slot for dynamic stack allocations.
+
+ // The scavenger might be invoked if the frame offset does not fit into
+ // the 16-bit immediate. We don't know the complete frame size here
+ // because we've not yet computed callee-saved register spills or the
+ // needed alignment padding.
+ unsigned StackSize = determineFrameLayout(MF, false, true);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+ hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ // Might we have over-aligned allocas?
+ bool HasAlVars = MFI->hasVarSizedObjects() &&
+ MFI->getMaxAlignment() > getStackAlignment();
+
+ // These kinds of spills might need two registers.
+ if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
+ RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+ }
+}
+
+bool
+PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+ // Return false otherwise to maintain pre-existing behavior.
+ if (!Subtarget.isSVR4ABI())
+ return false;
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ DebugLoc DL;
+ bool CRSpilled = false;
+ MachineInstrBuilder CRMIB;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ // Only Darwin actually uses the VRSAVE register, but it can still appear
+ // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
+ // Darwin, ignore it.
+ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ continue;
+
+ // CR2 through CR4 are the nonvolatile CR fields.
+ bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
+
+ // Add the callee-saved register as live-in; it's killed at the spill.
+ MBB.addLiveIn(Reg);
+
+ if (CRSpilled && IsCRField) {
+ CRMIB.addReg(Reg, RegState::ImplicitKill);
+ continue;
+ }
+
+ // Insert the spill to the stack frame.
+ if (IsCRField) {
+ PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
+ if (Subtarget.isPPC64()) {
+ // The actual spill will happen at the start of the prologue.
+ FuncInfo->addMustSaveCR(Reg);
+ } else {
+ CRSpilled = true;
+ FuncInfo->setSpillsCR();
+
+ // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
+ // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
+ CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+ .addReg(Reg, RegState::ImplicitKill);
+
+ MBB.insert(MI, CRMIB);
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
+ .addReg(PPC::R12,
+ getKillRegState(true)),
+ CSI[i].getFrameIdx()));
+ }
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
+ }
+ }
+ return true;
+}
+
+static void
+restoreCRs(bool isPPC64, bool is31,
+ bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ DebugLoc DL;
+ unsigned RestoreOp, MoveReg;
+
+ if (isPPC64)
+ // This is handled during epilogue generation.
+ return;
+ else {
+ // 32-bit: FP-relative
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
+ PPC::R12),
+ CSI[CSIIndex].getFrameIdx()));
+ RestoreOp = PPC::MTOCRF;
+ MoveReg = PPC::R12;
+ }
+
+ if (CR2Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
+ .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
+
+ if (CR3Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
+ .addReg(MoveReg, getKillRegState(!CR4Spilled)));
+
+ if (CR4Spilled)
+ MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
+ .addReg(MoveReg, getKillRegState(true)));
+}
+
+void PPCFrameLowering::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ if (MF.getTarget().Options.GuaranteedTailCallOpt &&
+ I->getOpcode() == PPC::ADJCALLSTACKUP) {
+ // Add (actually subtract) back the amount the callee popped on return.
+ if (int CalleeAmt = I->getOperand(1).getImm()) {
+ bool is64Bit = Subtarget.isPPC64();
+ CalleeAmt *= -1;
+ unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+ unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+ unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+ unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+ unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+ unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+ MachineInstr *MI = I;
+ DebugLoc dl = MI->getDebugLoc();
+
+ if (isInt<16>(CalleeAmt)) {
+ BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addImm(CalleeAmt);
+ } else {
+ MachineBasicBlock::iterator MBBI = I;
+ BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ .addImm(CalleeAmt >> 16);
+ BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+ .addReg(TmpReg, RegState::Kill)
+ .addImm(CalleeAmt & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
+ .addReg(StackReg, RegState::Kill)
+ .addReg(TmpReg);
+ }
+ }
+ }
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+bool
+PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+
+ // Currently, this function only handles SVR4 32- and 64-bit ABIs.
+ // Return false otherwise to maintain pre-existing behavior.
+ if (!Subtarget.isSVR4ABI())
+ return false;
+
+ MachineFunction *MF = MBB.getParent();
+ const PPCInstrInfo &TII =
+ *static_cast<const PPCInstrInfo*>(MF->getTarget().getInstrInfo());
+ bool CR2Spilled = false;
+ bool CR3Spilled = false;
+ bool CR4Spilled = false;
+ unsigned CSIIndex = 0;
+
+ // Initialize insertion-point logic; we will be restoring in reverse
+ // order of spill.
+ MachineBasicBlock::iterator I = MI, BeforeI = I;
+ bool AtStart = I == MBB.begin();
+
+ if (!AtStart)
+ --BeforeI;
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+
+ // Only Darwin actually uses the VRSAVE register, but it can still appear
+ // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
+ // Darwin, ignore it.
+ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ continue;
+
+ if (Reg == PPC::CR2) {
+ CR2Spilled = true;
+ // The spill slot is associated only with CR2, which is the
+ // first nonvolatile spilled. Save it here.
+ CSIIndex = i;
+ continue;
+ } else if (Reg == PPC::CR3) {
+ CR3Spilled = true;
+ continue;
+ } else if (Reg == PPC::CR4) {
+ CR4Spilled = true;
+ continue;
+ } else {
+ // When we first encounter a non-CR register after seeing at
+ // least one CR register, restore all spilled CRs together.
+ if ((CR2Spilled || CR3Spilled || CR4Spilled)
+ && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ bool is31 = needsFP(*MF);
+ restoreCRs(Subtarget.isPPC64(), is31,
+ CR2Spilled, CR3Spilled, CR4Spilled,
+ MBB, I, CSI, CSIIndex);
+ CR2Spilled = CR3Spilled = CR4Spilled = false;
+ }
+
+ // Default behavior for non-CR saves.
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ assert(I != MBB.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ }
+
+ // Insert in reverse order.
+ if (AtStart)
+ I = MBB.begin();
+ else {
+ I = BeforeI;
+ ++I;
+ }
+ }
+
+ // If we haven't yet spilled the CRs, do so now.
+ if (CR2Spilled || CR3Spilled || CR4Spilled) {
+ bool is31 = needsFP(*MF);
+ restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
+ MBB, I, CSI, CSIIndex);
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 000000000000..c0c7d248f8d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,126 @@
+//===-- PPCFrameLowering.h - Define frame lowering for PowerPC --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+ const PPCSubtarget &Subtarget;
+
+public:
+ PPCFrameLowering(const PPCSubtarget &STI);
+
+ unsigned determineFrameLayout(MachineFunction &MF,
+ bool UpdateMF = true,
+ bool UseEstimate = false) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool needsFP(const MachineFunction &MF) const;
+ void replaceFPWithRealFP(MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF,
+ RegScavenger *RS = nullptr) const override;
+ void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
+
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// targetHandlesStackFrameRounding - Returns true if the target is
+ /// responsible for rounding up the stack frame (probably at emitPrologue
+ /// time).
+ bool targetHandlesStackFrameRounding() const override { return true; }
+
+ /// getReturnSaveOffset - Return the previous frame offset to save the
+ /// return address.
+ static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI)
+ return isPPC64 ? 16 : 8;
+ // SVR4 ABI:
+ return isPPC64 ? 16 : 4;
+ }
+
+ /// getTOCSaveOffset - Return the previous frame offset to save the
+ /// TOC register -- 64-bit SVR4 ABI only.
+ static unsigned getTOCSaveOffset(bool isELFv2ABI) {
+ return isELFv2ABI ? 24 : 40;
+ }
+
+ /// getFramePointerSaveOffset - Return the previous frame offset to save the
+ /// frame pointer.
+ static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ // For the Darwin ABI:
+ // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+ // for saving the frame pointer (if needed.) While the published ABI has
+ // not used this slot since at least MacOSX 10.2, there is older code
+ // around that does use it, and that needs to continue to work.
+ if (isDarwinABI)
+ return isPPC64 ? -8U : -4U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -8U : -4U;
+ }
+
+ /// getBasePointerSaveOffset - Return the previous frame offset to save the
+ /// base pointer.
+ static unsigned getBasePointerSaveOffset(bool isPPC64,
+ bool isDarwinABI,
+ bool isPIC) {
+ if (isDarwinABI)
+ return isPPC64 ? -16U : -8U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -16U : isPIC ? -12U : -8U;
+ }
+
+ /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+ ///
+ static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI,
+ bool isELFv2ABI) {
+ if (isDarwinABI || isPPC64)
+ return (isELFv2ABI ? 4 : 6) * (isPPC64 ? 8 : 4);
+
+ // SVR4 ABI:
+ return 8;
+ }
+
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 000000000000..d9b242cad265
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,433 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "pre-RA-sched"
+
+bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
+ // FIXME: Move this.
+ if (isBCTRAfterSet(SU))
+ return true;
+
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->mayLoad())
+ return false;
+
+ // SU is a load; for any predecessors in this dispatch group, that are stores,
+ // and with which we have an ordering dependency, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || !PredMCID->mayStore())
+ continue;
+
+ if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (!MCID)
+ return false;
+
+ if (!MCID->isBranch())
+ return false;
+
+ // SU is a branch; for any predecessors in this dispatch group, with which we
+ // have a data dependence and set the counter register, return true.
+ for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
+ const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
+ if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
+ continue;
+
+ if (SU->Preds[i].isCtrl())
+ continue;
+
+ for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
+ if (SU->Preds[i].getSUnit() == CurGroup[j])
+ return true;
+ }
+
+ return false;
+}
+
+// FIXME: Remove this when we don't need this:
+namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
+
+// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
+
+bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
+ unsigned &NSlots) {
+ // FIXME: Indirectly, this information is contained in the itinerary, and
+ // we should derive it from there instead of separately specifying it
+ // here.
+ unsigned IIC = MCID->getSchedClass();
+ switch (IIC) {
+ default:
+ NSlots = 1;
+ break;
+ case PPC::Sched::IIC_IntDivW:
+ case PPC::Sched::IIC_IntDivD:
+ case PPC::Sched::IIC_LdStLoadUpd:
+ case PPC::Sched::IIC_LdStLDU:
+ case PPC::Sched::IIC_LdStLFDU:
+ case PPC::Sched::IIC_LdStLFDUX:
+ case PPC::Sched::IIC_LdStLHA:
+ case PPC::Sched::IIC_LdStLHAU:
+ case PPC::Sched::IIC_LdStLWA:
+ case PPC::Sched::IIC_LdStSTDU:
+ case PPC::Sched::IIC_LdStSTFDU:
+ NSlots = 2;
+ break;
+ case PPC::Sched::IIC_LdStLoadUpdX:
+ case PPC::Sched::IIC_LdStLDUX:
+ case PPC::Sched::IIC_LdStLHAUX:
+ case PPC::Sched::IIC_LdStLWARX:
+ case PPC::Sched::IIC_LdStLDARX:
+ case PPC::Sched::IIC_LdStSTDUX:
+ case PPC::Sched::IIC_LdStSTDCX:
+ case PPC::Sched::IIC_LdStSTWCX:
+ case PPC::Sched::IIC_BrMCRX: // mtcr
+ // FIXME: Add sync/isync (here and in the itinerary).
+ NSlots = 4;
+ break;
+ }
+
+ // FIXME: record-form instructions need a different itinerary class.
+ if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
+ NSlots = 2;
+
+ switch (IIC) {
+ default:
+ // All multi-slot instructions must come first.
+ return NSlots > 1;
+ case PPC::Sched::IIC_BrCR: // cr logicals
+ case PPC::Sched::IIC_SprMFCR:
+ case PPC::Sched::IIC_SprMFCRF:
+ case PPC::Sched::IIC_SprMTSPR:
+ return true;
+ }
+}
+
+ScheduleHazardRecognizer::HazardType
+PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (Stalls == 0 && isLoadAfterStore(SU))
+ return NoopHazard;
+
+ return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ unsigned NSlots;
+ if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
+ return true;
+
+ return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
+}
+
+unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ // We only need to fill out a maximum of 5 slots here: The 6th slot could
+ // only be a second branch, and otherwise the next instruction will start a
+ // new group.
+ if (isLoadAfterStore(SU) && CurSlots < 6) {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If we're using a special group-terminating nop, then we need only one.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 )
+ return 1;
+
+ return 5 - CurSlots;
+ }
+
+ return ScoreboardHazardRecognizer::PreEmitNoops(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
+ const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+ if (MCID) {
+ if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ DEBUG(dbgs() << "**** Adding to dispatch group: SU(" <<
+ SU->NodeNum << "): ");
+ DEBUG(DAG->dumpNode(SU));
+
+ unsigned NSlots;
+ bool MustBeFirst = mustComeFirst(MCID, NSlots);
+
+ // If this instruction must come first, but does not, then it starts a
+ // new group.
+ if (MustBeFirst && CurSlots) {
+ CurSlots = CurBranches = 0;
+ CurGroup.clear();
+ }
+
+ CurSlots += NSlots;
+ CurGroup.push_back(SU);
+
+ if (MCID->isBranch())
+ ++CurBranches;
+ }
+ }
+
+ return ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
+ return ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
+ llvm_unreachable("Bottom-up scheduling not supported");
+}
+
+void PPCDispatchGroupSBHazardRecognizer::Reset() {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ return ScoreboardHazardRecognizer::Reset();
+}
+
+void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+ // If the group has now filled all of its slots, or if we're using a special
+ // group-terminating nop, the group is complete.
+ if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
+ Directive == PPC::DIR_PWR8 || CurSlots == 6) {
+ CurGroup.clear();
+ CurSlots = CurBranches = 0;
+ } else {
+ CurGroup.push_back(nullptr);
+ ++CurSlots;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor. Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group. Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970. These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen. These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group. To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+// 1. Modeling of microcoded instructions.
+// 2. Handling of serialized operations.
+// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
+ : DAG(DAG) {
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+ DEBUG(errs() << "=== Start of dispatch group\n");
+ NumIssued = 0;
+
+ // Structural hazard info.
+ HasCTRSet = false;
+ NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,
+ bool &isCracked,
+ bool &isLoad, bool &isStore) {
+ const MCInstrDesc &MCID = DAG.TII->get(Opcode);
+
+ isLoad = MCID.mayLoad();
+ isStore = MCID.mayStore();
+
+ uint64_t TSFlags = MCID.TSFlags;
+
+ isFirst = TSFlags & PPCII::PPC970_First;
+ isSingle = TSFlags & PPCII::PPC970_Single;
+ isCracked = TSFlags & PPCII::PPC970_Cracked;
+ return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const {
+ for (unsigned i = 0, e = NumStores; i != e; ++i) {
+ // Handle exact and commuted addresses.
+ if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
+ return true;
+
+ // Okay, we don't have an exact match, if this is an indexed offset, see if
+ // we have overlap (which happens during fp->int conversion for example).
+ if (StoreValue[i] == LoadValue) {
+ // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
+ // to see if the load and store actually overlap.
+ if (StoreOffset[i] < LoadOffset) {
+ if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
+ } else {
+ if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group. We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+ assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return NoHazard;
+
+ unsigned Opcode = MI->getOpcode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+
+ // We can only issue a PPC970_First/PPC970_Single instruction (such as
+ // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+ if (NumIssued != 0 && (isFirst || isSingle))
+ return Hazard;
+
+ // If this instruction is cracked into two ops by the decoder, we know that
+ // it is not a branch and that it cannot issue if 3 other instructions are
+ // already in the dispatch group.
+ if (isCracked && NumIssued > 2)
+ return Hazard;
+
+ switch (InstrType) {
+ default: llvm_unreachable("Unknown instruction type!");
+ case PPCII::PPC970_FXU:
+ case PPCII::PPC970_LSU:
+ case PPCII::PPC970_FPU:
+ case PPCII::PPC970_VALU:
+ case PPCII::PPC970_VPERM:
+ // We can only issue a branch as the last instruction in a group.
+ if (NumIssued == 4) return Hazard;
+ break;
+ case PPCII::PPC970_CRU:
+ // We can only issue a CR instruction in the first two slots.
+ if (NumIssued >= 2) return Hazard;
+ break;
+ case PPCII::PPC970_BRU:
+ break;
+ }
+
+ // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+ if (HasCTRSet && Opcode == PPC::BCTRL)
+ return NoopHazard;
+
+ // If this is a load following a store, make sure it's not to the same or
+ // overlapping address.
+ if (isLoad && NumStores && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ if (isLoadOfStoredAddress(MO->getSize(),
+ MO->getOffset(), MO->getValue()))
+ return NoopHazard;
+ }
+
+ return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+
+ if (MI->isDebugValue())
+ return;
+
+ unsigned Opcode = MI->getOpcode();
+ bool isFirst, isSingle, isCracked, isLoad, isStore;
+ PPCII::PPC970_Unit InstrType =
+ GetInstrType(Opcode, isFirst, isSingle, isCracked,
+ isLoad, isStore);
+ if (InstrType == PPCII::PPC970_Pseudo) return;
+
+ // Update structural hazard information.
+ if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
+
+ // Track the address stored to.
+ if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
+ MachineMemOperand *MO = *MI->memoperands_begin();
+ StoreSize[NumStores] = MO->getSize();
+ StoreOffset[NumStores] = MO->getOffset();
+ StoreValue[NumStores] = MO->getValue();
+ ++NumStores;
+ }
+
+ if (InstrType == PPCII::PPC970_BRU || isSingle)
+ NumIssued = 4; // Terminate a d-group.
+ ++NumIssued;
+
+ // If this instruction is cracked into two ops by the decoder, remember that
+ // we issued two pieces.
+ if (isCracked)
+ ++NumIssued;
+
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+ assert(NumIssued < 5 && "Illegal dispatch group!");
+ ++NumIssued;
+ if (NumIssued == 5)
+ EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::Reset() {
+ EndDispatchGroup();
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 000000000000..23f76c16d138
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,102 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+/// PPCDispatchGroupSBHazardRecognizer - This class implements a scoreboard-based
+/// hazard recognizer for PPC ooo processors with dispatch-group hazards.
+class PPCDispatchGroupSBHazardRecognizer : public ScoreboardHazardRecognizer {
+ const ScheduleDAG *DAG;
+ SmallVector<SUnit *, 7> CurGroup;
+ unsigned CurSlots, CurBranches;
+
+ bool isLoadAfterStore(SUnit *SU);
+ bool isBCTRAfterSet(SUnit *SU);
+ bool mustComeFirst(const MCInstrDesc *MCID, unsigned &NSlots);
+public:
+ PPCDispatchGroupSBHazardRecognizer(const InstrItineraryData *ItinData,
+ const ScheduleDAG *DAG_) :
+ ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
+ CurSlots(0), CurBranches(0) {}
+
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ bool ShouldPreferAnother(SUnit* SU) override;
+ unsigned PreEmitNoops(SUnit *SU) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+ void Reset() override;
+ void EmitNoop() override;
+};
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor. This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+ const ScheduleDAG &DAG;
+
+ unsigned NumIssued; // Number of insts issued, including advanced cycles.
+
+ // Various things that can cause a structural hazard.
+
+ // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+ bool HasCTRSet;
+
+ // StoredPtr - Keep track of the address of any store. If we see a load from
+ // the same address (or one that aliases it), disallow the store. We can have
+ // up to four stores in one dispatch group, hence we track up to 4.
+ //
+ // This is null if we haven't seen a store yet. We keep track of both
+ // operands of the store here, since we support [r+r] and [r+i] addressing.
+ const Value *StoreValue[4];
+ int64_t StoreOffset[4];
+ uint64_t StoreSize[4];
+ unsigned NumStores;
+
+public:
+ PPCHazardRecognizer970(const ScheduleDAG &DAG);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
+ virtual void EmitInstruction(SUnit *SU) override;
+ virtual void AdvanceCycle() override;
+ virtual void Reset() override;
+
+private:
+ /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+ ///
+ void EndDispatchGroup();
+
+ /// GetInstrType - Classify the specified powerpc opcode according to its
+ /// pipeline.
+ PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+ bool &isFirst, bool &isSingle,bool &isCracked,
+ bool &isLoad, bool &isStore);
+
+ bool isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
+ const Value *LoadValue) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 000000000000..490f6d2bcd4d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,2205 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-codegen"
+
+// FIXME: Remove this once the bug has been fixed!
+cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
+cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
+
+namespace llvm {
+ void initializePPCDAGToDAGISelPass(PassRegistry&);
+}
+
+namespace {
+ //===--------------------------------------------------------------------===//
+ /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class PPCDAGToDAGISel : public SelectionDAGISel {
+ const PPCTargetMachine &TM;
+ const PPCTargetLowering *PPCLowering;
+ const PPCSubtarget *PPCSubTarget;
+ unsigned GlobalBaseReg;
+ public:
+ explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+ : SelectionDAGISel(tm), TM(tm),
+ PPCLowering(TM.getTargetLowering()),
+ PPCSubTarget(TM.getSubtargetImpl()) {
+ initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ PPCLowering = TM.getTargetLowering();
+ PPCSubTarget = TM.getSubtargetImpl();
+ SelectionDAGISel::runOnMachineFunction(MF);
+
+ if (!PPCSubTarget->isSVR4ABI())
+ InsertVRSaveCode(MF);
+
+ return true;
+ }
+
+ void PostprocessISelDAG() override;
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getI64Imm - Return a target constant with the specified value, of type
+ /// i64.
+ inline SDValue getI64Imm(uint64_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i64);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
+ }
+
+ /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+ /// with any number of 0s on either side. The 1s are allowed to wrap from
+ /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+ /// 0x0F0F0000 is not, since all 1s are not contiguous.
+ static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+ /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+ /// rotate and mask opcode and mask operation.
+ static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+ unsigned &SH, unsigned &MB, unsigned &ME);
+
+ /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+ /// base register. Return the virtual register that holds this value.
+ SDNode *getGlobalBaseReg();
+
+ // Select - Convert the specified operand from a target-independent to a
+ // target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N) override;
+
+ SDNode *SelectBitfieldInsert(SDNode *N);
+
+ /// SelectCC - Select a comparison of the specified values with the
+ /// specified condition code, returning the CR# of the expression.
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
+
+ /// SelectAddrImm - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement [r+imm].
+ bool SelectAddrImm(SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
+ }
+
+ /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+ /// immediate field. Note that the operand at this point is already the
+ /// result of a prior SelectAddressRegImm call.
+ bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+ if (N.getOpcode() == ISD::TargetConstant ||
+ N.getOpcode() == ISD::TargetGlobalAddress) {
+ Out = N;
+ return true;
+ }
+
+ return false;
+ }
+
+ /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+ /// represented as an indexed [r+r] operation. Returns false if it can
+ /// be represented by [r+imm], which are preferred.
+ bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+ return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ }
+
+ /// SelectAddrImmX4 - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement that is a multiple of 4.
+ /// Suitable for use by STD and friends.
+ bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
+ }
+
+ // Select an address into a single register.
+ bool SelectAddr(SDValue N, SDValue &Base) {
+ Base = N;
+ return true;
+ }
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions. It is always correct to compute the value into
+ /// a register. The case of adding a (possibly relocatable) constant to a
+ /// register can be improved, but it is wrong to substitute Reg+Reg for
+ /// Reg in an asm, because the load or store opcode would have to change.
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override {
+ OutOps.push_back(Op);
+ return false;
+ }
+
+ void InsertVRSaveCode(MachineFunction &MF);
+
+ const char *getPassName() const override {
+ return "PowerPC DAG->DAG Pattern Instruction Selection";
+ }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+ SDNode *SelectSETCC(SDNode *N);
+
+ void PeepholePPC64();
+ void PeepholeCROps();
+
+ bool AllUsersSelectZero(SDNode *N);
+ void SwapAllSelectUsers(SDNode *N);
+ };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE. If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+ // Check to see if this function uses vector registers, which means we have to
+ // save and restore the VRSAVE register and update it with the regs we use.
+ //
+ // In this case, there will be virtual registers of vector type created
+ // by the scheduler. Detect them now.
+ bool HasVectorVReg = false;
+ for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+ HasVectorVReg = true;
+ break;
+ }
+ }
+ if (!HasVectorVReg) return; // nothing to do.
+
+ // If we have a vector register, we want to emit code into the entry and exit
+ // blocks to save and restore the VRSAVE register. We do this here (instead
+ // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+ //
+ // 1. This (trivially) reduces the load on the register allocator, by not
+ // having to represent the live range of the VRSAVE register.
+ // 2. This (more significantly) allows us to create a temporary virtual
+ // register to hold the saved VRSAVE value, allowing this temporary to be
+ // register allocated, instead of forcing it to be spilled to the stack.
+
+ // Create two vregs - one to hold the VRSAVE register that is live-in to the
+ // function and one for the value after having bits or'd into it.
+ unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ MachineBasicBlock &EntryBB = *Fn.begin();
+ DebugLoc dl;
+ // Emit the following code into the entry block:
+ // InVRSAVE = MFVRSAVE
+ // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+ // MTVRSAVE UpdatedVRSAVE
+ MachineBasicBlock::iterator IP = EntryBB.begin(); // Insert Point
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+ UpdatedVRSAVE).addReg(InVRSAVE);
+ BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+ // Find all return blocks, outputting a restore in each epilog.
+ for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ if (!BB->empty() && BB->back().isReturn()) {
+ IP = BB->end(); --IP;
+
+ // Skip over all terminator instructions, which are part of the return
+ // sequence.
+ MachineBasicBlock::iterator I2 = IP;
+ while (I2 != BB->begin() && (--I2)->isTerminator())
+ IP = I2;
+
+ // Emit: MTVRSAVE InVRSave
+ BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+ }
+ }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+ if (!GlobalBaseReg) {
+ const TargetInstrInfo &TII = *TM.getInstrInfo();
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc dl;
+
+ if (PPCLowering->getPointerTy() == MVT::i32) {
+ if (PPCSubTarget->isTargetELF())
+ GlobalBaseReg = PPC::R30;
+ else
+ GlobalBaseReg =
+ RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+ if (PPCSubTarget->isTargetELF()) {
+ unsigned TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::GetGBRO), TempReg).addReg(GlobalBaseReg);
+ BuildMI(FirstMBB, MBBI, dl,
+ TII.get(PPC::UpdateGBR)).addReg(GlobalBaseReg).addReg(TempReg);
+ MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
+ }
+ } else {
+ GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
+ BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+ }
+ }
+ return CurDAG->getRegister(GlobalBaseReg,
+ PPCLowering->getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (N->getOpcode() != ISD::Constant)
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand. If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+ if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+ Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ return true;
+ }
+ return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+ return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+ return N->getOpcode() == Opc
+ && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
+ if (isShiftedMask_32(Val)) {
+ // look for the first non-zero bit
+ MB = countLeadingZeros(Val);
+ // look for the first zero bit after the run of ones
+ ME = countLeadingZeros((Val - 1) ^ Val);
+ return true;
+ } else {
+ Val = ~Val; // invert mask
+ if (isShiftedMask_32(Val)) {
+ // effectively look for the first zero bit
+ ME = countLeadingZeros(Val) - 1;
+ // effectively look for the first one bit after the run of zeros
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ return true;
+ }
+ }
+ // no run present
+ return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+ bool isShiftMask, unsigned &SH,
+ unsigned &MB, unsigned &ME) {
+ // Don't even go down this path for i64, since different logic will be
+ // necessary for rldicl/rldicr/rldimi.
+ if (N->getValueType(0) != MVT::i32)
+ return false;
+
+ unsigned Shift = 32;
+ unsigned Indeterminant = ~0; // bit mask marking indeterminant results
+ unsigned Opcode = N->getOpcode();
+ if (N->getNumOperands() != 2 ||
+ !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+ return false;
+
+ if (Opcode == ISD::SHL) {
+ // apply shift left to mask if it comes first
+ if (isShiftMask) Mask = Mask << Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu << Shift);
+ } else if (Opcode == ISD::SRL) {
+ // apply shift right to mask if it comes first
+ if (isShiftMask) Mask = Mask >> Shift;
+ // determine which bits are made indeterminant by shift
+ Indeterminant = ~(0xFFFFFFFFu >> Shift);
+ // adjust for the left rotate
+ Shift = 32 - Shift;
+ } else if (Opcode == ISD::ROTL) {
+ Indeterminant = 0;
+ } else {
+ return false;
+ }
+
+ // if the mask doesn't intersect any Indeterminant bits
+ if (Mask && !(Mask & Indeterminant)) {
+ SH = Shift & 31;
+ // make sure the mask is still a mask (wrap arounds may not be)
+ return isRunOfOnes(Mask, MB, ME);
+ }
+ return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDLoc dl(N);
+
+ APInt LKZ, LKO, RKZ, RKO;
+ CurDAG->computeKnownBits(Op0, LKZ, LKO);
+ CurDAG->computeKnownBits(Op1, RKZ, RKO);
+
+ unsigned TargetMask = LKZ.getZExtValue();
+ unsigned InsertMask = RKZ.getZExtValue();
+
+ if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+ unsigned Op0Opc = Op0.getOpcode();
+ unsigned Op1Opc = Op1.getOpcode();
+ unsigned Value, SH = 0;
+ TargetMask = ~TargetMask;
+ InsertMask = ~InsertMask;
+
+ // If the LHS has a foldable shift and the RHS does not, then swap it to the
+ // RHS so that we can fold the shift into the insert.
+ if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+ if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+ Op0.getOperand(0).getOpcode() == ISD::SRL) {
+ if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+ } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+ if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+ Op1.getOperand(0).getOpcode() != ISD::SRL) {
+ std::swap(Op0, Op1);
+ std::swap(Op0Opc, Op1Opc);
+ std::swap(TargetMask, InsertMask);
+ }
+ }
+
+ unsigned MB, ME;
+ if (isRunOfOnes(InsertMask, MB, ME)) {
+ SDValue Tmp1, Tmp2;
+
+ if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+ isInt32Immediate(Op1.getOperand(1), Value)) {
+ Op1 = Op1.getOperand(0);
+ SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+ }
+ if (Op1Opc == ISD::AND) {
+ // The AND mask might not be a constant, and we need to make sure that
+ // if we're going to fold the masking with the insert, all bits not
+ // know to be zero in the mask are known to be one.
+ APInt MKZ, MKO;
+ CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
+ bool CanFoldMask = InsertMask == MKO.getZExtValue();
+
+ unsigned SHOpc = Op1.getOperand(0).getOpcode();
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
+ isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ // Note that Value must be in range here (less than 32) because
+ // otherwise there would not be any bits set in InsertMask.
+ Op1 = Op1.getOperand(0).getOperand(0);
+ SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+ }
+ }
+
+ SH &= 31;
+ SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
+ getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
+ }
+ }
+ return nullptr;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDLoc dl) {
+ // Always select the LHS.
+ unsigned Opc;
+
+ if (LHS.getValueType() == MVT::i32) {
+ unsigned Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt32Immediate(RHS, Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>((int)Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpw cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmplwi cr0,r0,0x5678
+ // beq cr0,L6
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+ getI32Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ }
+ Opc = PPC::CMPLW;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLW;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+ getI32Imm((int)SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPW;
+ }
+ } else if (LHS.getValueType() == MVT::i64) {
+ uint64_t Imm;
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ if (isInt64Immediate(RHS.getNode(), Imm)) {
+ // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+ if (isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+ // If this is a 16-bit signed immediate, fold it.
+ if (isInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI32Imm(Imm & 0xFFFF)), 0);
+
+ // For non-equality comparisons, the default code would materialize the
+ // constant, then compare against it, like this:
+ // lis r2, 4660
+ // ori r2, r2, 22136
+ // cmpd cr0, r3, r2
+ // Since we are just comparing for equality, we can emit this instead:
+ // xoris r0,r3,0x1234
+ // cmpldi cr0,r0,0x5678
+ // beq cr0,L6
+ if (isUInt<32>(Imm)) {
+ SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+ getI64Imm(Imm >> 16)), 0);
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ }
+ }
+ Opc = PPC::CMPLD;
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+ getI64Imm(Imm & 0xFFFF)), 0);
+ Opc = PPC::CMPLD;
+ } else {
+ short SImm;
+ if (isIntS16Immediate(RHS, SImm))
+ return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+ getI64Imm(SImm & 0xFFFF)),
+ 0);
+ Opc = PPC::CMPD;
+ }
+ } else if (LHS.getValueType() == MVT::f32) {
+ Opc = PPC::FCMPUS;
+ } else {
+ assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+ Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
+ }
+ return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+ switch (CC) {
+ case ISD::SETUEQ:
+ case ISD::SETONE:
+ case ISD::SETOLE:
+ case ISD::SETOGE:
+ llvm_unreachable("Should be lowered by legalize!");
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return PPC::PRED_EQ;
+ case ISD::SETUNE:
+ case ISD::SETNE: return PPC::PRED_NE;
+ case ISD::SETOLT:
+ case ISD::SETLT: return PPC::PRED_LT;
+ case ISD::SETULE:
+ case ISD::SETLE: return PPC::PRED_LE;
+ case ISD::SETOGT:
+ case ISD::SETGT: return PPC::PRED_GT;
+ case ISD::SETUGE:
+ case ISD::SETGE: return PPC::PRED_GE;
+ case ISD::SETO: return PPC::PRED_NU;
+ case ISD::SETUO: return PPC::PRED_UN;
+ // These two are invalid for floating point. Assume we have int.
+ case ISD::SETULT: return PPC::PRED_LT;
+ case ISD::SETUGT: return PPC::PRED_GT;
+ }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted. That is, lt = 0; ge = 0 inverted.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
+ Invert = false;
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition!");
+ case ISD::SETOLT:
+ case ISD::SETLT: return 0; // Bit #0 = SETOLT
+ case ISD::SETOGT:
+ case ISD::SETGT: return 1; // Bit #1 = SETOGT
+ case ISD::SETOEQ:
+ case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
+ case ISD::SETUO: return 3; // Bit #3 = SETUO
+ case ISD::SETUGE:
+ case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
+ case ISD::SETULE:
+ case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
+ case ISD::SETUNE:
+ case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
+ case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
+ case ISD::SETUEQ:
+ case ISD::SETOGE:
+ case ISD::SETOLE:
+ case ISD::SETONE:
+ llvm_unreachable("Invalid branch code: should be expanded by legalize");
+ // These are invalid for floating point. Assume integer.
+ case ISD::SETULT: return 0;
+ case ISD::SETUGT: return 1;
+ }
+}
+
+// getVCmpInst: return the vector compare instruction for the specified
+// vector type and condition code. Since this is for altivec specific code,
+// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+ bool HasVSX, bool &Swap, bool &Negate) {
+ Swap = false;
+ Negate = false;
+
+ if (VecVT.isFloatingPoint()) {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+ case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+ case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPEQDP;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGTDP;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ if (VecVT == MVT::v4f32)
+ return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+ else if (VecVT == MVT::v2f64)
+ return PPC::XVCMPGEDP;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid floating-point vector compare condition");
+ } else {
+ /* Handle some cases by swapping input operands. */
+ switch (CC) {
+ case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+ case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+ case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+ case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+ default: break;
+ }
+ /* Handle some cases by negating the result. */
+ switch (CC) {
+ case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+ case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+ case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+ case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+ default: break;
+ }
+ /* We have instructions implementing the remaining cases. */
+ switch (CC) {
+ case ISD::SETEQ:
+ case ISD::SETUEQ:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPEQUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPEQUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPEQUW;
+ break;
+ case ISD::SETGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTSB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTSH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTSW;
+ break;
+ case ISD::SETUGT:
+ if (VecVT == MVT::v16i8)
+ return PPC::VCMPGTUB;
+ else if (VecVT == MVT::v8i16)
+ return PPC::VCMPGTUH;
+ else if (VecVT == MVT::v4i32)
+ return PPC::VCMPGTUW;
+ break;
+ default:
+ break;
+ }
+ llvm_unreachable("Invalid integer vector compare condition");
+ }
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+ SDLoc dl(N);
+ unsigned Imm;
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ if (!PPCSubTarget->useCRBits() &&
+ isInt32Immediate(N->getOperand(1), Imm)) {
+ // We can codegen setcc op, imm very efficiently compared to a brcond.
+ // Check for those cases here.
+ // setcc op, 0
+ if (Imm == 0) {
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ: {
+ Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+ SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ SDValue AD =
+ SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+ AD.getValue(1));
+ }
+ case ISD::SETLT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ case ISD::SETGT: {
+ SDValue T =
+ SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+ T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+ SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ }
+ } else if (Imm == ~0U) { // setcc op, -1
+ SDValue Op = N->getOperand(0);
+ switch (CC) {
+ default: break;
+ case ISD::SETEQ:
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(1)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+ MVT::i32,
+ getI32Imm(0)), 0),
+ Op.getValue(1));
+ case ISD::SETNE: {
+ if (isPPC64) break;
+ Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+ SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ Op, getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+ Op, SDValue(AD, 1));
+ }
+ case ISD::SETLT: {
+ SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+ getI32Imm(1)), 0);
+ SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+ Op), 0);
+ SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ case ISD::SETGT: {
+ SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+ Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+ getI32Imm(1));
+ }
+ }
+ }
+ }
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // Altivec Vector compare instructions do not set any CR register by default and
+ // vector compare operations return the same type as the operands.
+ if (LHS.getValueType().isVector()) {
+ EVT VecVT = LHS.getValueType();
+ bool Swap, Negate;
+ unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+ PPCSubTarget->hasVSX(), Swap, Negate);
+ if (Swap)
+ std::swap(LHS, RHS);
+
+ if (Negate) {
+ SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+ PPC::VNOR,
+ VecVT, VCmp, VCmp);
+ }
+
+ return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
+ }
+
+ if (PPCSubTarget->useCRBits())
+ return nullptr;
+
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+ SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
+ SDValue IntCR;
+
+ // Force the ccreg into CR7.
+ SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+ SDValue InFlag(nullptr, 0); // Null incoming flag value.
+ CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+ InFlag).getValue(1);
+
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
+
+ SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+ getI32Imm(31), getI32Imm(31) };
+ if (!Inv)
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+
+ // Get the specified bit.
+ SDValue Tmp =
+ SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+ SDLoc dl(N);
+ if (N->isMachineOpcode()) {
+ N->setNodeId(-1);
+ return nullptr; // Already selected.
+ }
+
+ switch (N->getOpcode()) {
+ default: break;
+
+ case ISD::Constant: {
+ if (N->getValueType(0) == MVT::i64) {
+ // Get 64 bit value.
+ int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+ // Assume no remaining bits.
+ unsigned Remainder = 0;
+ // Assume no shift required.
+ unsigned Shift = 0;
+
+ // If it can't be represented as a 32 bit value.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ // If the shifted value fits 32 bits.
+ if (isInt<32>(ImmSh)) {
+ // Go with the shifted value.
+ Imm = ImmSh;
+ } else {
+ // Still stuck with a 64 bit value.
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Intermediate operand.
+ SDNode *Result;
+
+ // Handle first 32 bits.
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ // Simple value.
+ if (isInt<16>(Imm)) {
+ // Just the Lo bits.
+ Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+ } else if (Lo) {
+ // Handle the Hi bits.
+ unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+ Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+ // And Lo bits.
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ } else {
+ // Just the Hi bits.
+ Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+ }
+
+ // If no shift, we're done.
+ if (!Shift) return Result;
+
+ // Shift for next step if the upper 32-bits were not zero.
+ if (Imm) {
+ Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+ SDValue(Result, 0),
+ getI32Imm(Shift),
+ getI32Imm(63 - Shift));
+ }
+
+ // Add in the last bits as required.
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Hi));
+ }
+ if ((Lo = Remainder & 0xFFFF)) {
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo));
+ }
+
+ return Result;
+ }
+ break;
+ }
+
+ case ISD::SETCC: {
+ SDNode *SN = SelectSETCC(N);
+ if (SN)
+ return SN;
+ break;
+ }
+ case PPCISD::GlobalBaseReg:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+ getSmallIPtrImm(0));
+ }
+
+ case PPCISD::MFOCRF: {
+ SDValue InFlag = N->getOperand(1);
+ return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
+ }
+
+ case ISD::SDIV: {
+ // FIXME: since this depends on the setting of the carry flag from the srawi
+ // we should really be making notes about that for the scheduler.
+ // FIXME: It sure would be nice if we could cheaply recognize the
+ // srl/add/sra pattern the dag combiner will generate for this as
+ // sra/addze rather than having to handle sdiv ourselves. oh well.
+ unsigned Imm;
+ if (isInt32Immediate(N->getOperand(1), Imm)) {
+ SDValue N0 = N->getOperand(0);
+ if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(Imm)));
+ return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1));
+ } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+ SDNode *Op =
+ CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+ N0, getI32Imm(Log2_32(-Imm)));
+ SDValue PT =
+ SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+ SDValue(Op, 0), SDValue(Op, 1)),
+ 0);
+ return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+
+ case ISD::LOAD: {
+ // Handle preincrement loads.
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT LoadedVT = LD->getMemoryVT();
+
+ // Normal loads are handled by code generated from the .td file.
+ if (LD->getAddressingMode() != ISD::PRE_INC)
+ break;
+
+ SDValue Offset = LD->getOffset();
+ if (Offset.getOpcode() == ISD::TargetConstant ||
+ Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDU; break;
+ case MVT::f32: Opcode = PPC::LFSU; break;
+ case MVT::i32: Opcode = PPC::LWZU; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDU; break;
+ case MVT::i32: Opcode = PPC::LWZU8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZU8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Offset, Base, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops);
+ } else {
+ unsigned Opcode;
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+ if (LD->getValueType(0) != MVT::i64) {
+ // Handle PPC32 integer and normal FP loads.
+ assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::f64: Opcode = PPC::LFDUX; break;
+ case MVT::f32: Opcode = PPC::LFSUX; break;
+ case MVT::i32: Opcode = PPC::LWZUX; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX; break;
+ }
+ } else {
+ assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+ assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
+ "Invalid sext update load");
+ switch (LoadedVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid PPC load type!");
+ case MVT::i64: Opcode = PPC::LDUX; break;
+ case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
+ case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
+ case MVT::i1:
+ case MVT::i8: Opcode = PPC::LBZUX8; break;
+ }
+ }
+
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+ PPCLowering->getPointerTy(),
+ MVT::Other, Ops);
+ }
+ }
+
+ case ISD::AND: {
+ unsigned Imm, Imm2, SH, MB, ME;
+ uint64_t Imm64;
+
+ // If this is an and of a value rotated between 0 and 31 bits and then and'd
+ // with a mask, emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ // If this is just a masked value where the input is not handled above, and
+ // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ isRunOfOnes(Imm, MB, ME) &&
+ N->getOperand(0).getOpcode() != ISD::ROTL) {
+ SDValue Val = N->getOperand(0);
+ SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+ // If this is a 64-bit zero-extension mask, emit rldicl.
+ if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
+ isMask_64(Imm64)) {
+ SDValue Val = N->getOperand(0);
+ MB = 64 - CountTrailingOnes_64(Imm64);
+ SH = 0;
+
+ // If the operand is a logical right shift, we can fold it into this
+ // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
+ // for n <= mb. The right shift is really a left rotate followed by a
+ // mask, and this mask is a more-restrictive sub-mask of the mask implied
+ // by the shift.
+ if (Val.getOpcode() == ISD::SRL &&
+ isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
+ assert(Imm < 64 && "Illegal shift amount");
+ Val = Val.getOperand(0);
+ SH = 64 - Imm;
+ }
+
+ SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) };
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+ }
+ // AND X, 0 -> 0, not "rlwinm 32".
+ if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+ ReplaceUses(SDValue(N, 0), N->getOperand(1));
+ return nullptr;
+ }
+ // ISD::OR doesn't get all the bitfield insertion fun.
+ // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+ if (isInt32Immediate(N->getOperand(1), Imm) &&
+ N->getOperand(0).getOpcode() == ISD::OR &&
+ isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+ unsigned MB, ME;
+ Imm = ~(Imm^Imm2);
+ if (isRunOfOnes(Imm, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ N->getOperand(0).getOperand(1),
+ getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+ return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
+ }
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::OR:
+ if (N->getValueType(0) == MVT::i32)
+ if (SDNode *I = SelectBitfieldInsert(N))
+ return I;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SHL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::SRL: {
+ unsigned Imm, SH, MB, ME;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+ isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+ SDValue Ops[] = { N->getOperand(0).getOperand(0),
+ getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
+ }
+
+ // Other cases are autogenerated.
+ break;
+ }
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ case PPCISD::ANDIo_1_EQ_BIT:
+ case PPCISD::ANDIo_1_GT_BIT: {
+ if (!ANDIGlueBug)
+ break;
+
+ EVT InVT = N->getOperand(0).getValueType();
+ assert((InVT == MVT::i64 || InVT == MVT::i32) &&
+ "Invalid input type for ANDIo_1_EQ_BIT");
+
+ unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDIo8 : PPC::ANDIo;
+ SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
+ N->getOperand(0),
+ CurDAG->getTargetConstant(1, InVT)), 0);
+ SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
+ SDValue SRIdxVal =
+ CurDAG->getTargetConstant(N->getOpcode() == PPCISD::ANDIo_1_EQ_BIT ?
+ PPC::sub_eq : PPC::sub_gt, MVT::i32);
+
+ return CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1,
+ CR0Reg, SRIdxVal,
+ SDValue(AndI.getNode(), 1) /* glue */);
+ }
+ case ISD::SELECT_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+ EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+
+ // If this is a select of i1 operands, we'll pattern match it.
+ if (PPCSubTarget->useCRBits() &&
+ N->getOperand(0).getValueType() == MVT::i1)
+ break;
+
+ // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
+ if (!isPPC64)
+ if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+ if (N1C->isNullValue() && N3C->isNullValue() &&
+ N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U));
+ return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+ SDValue(Tmp, 0), N->getOperand(0),
+ SDValue(Tmp, 1));
+ }
+
+ SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+
+ if (N->getValueType(0) == MVT::i1) {
+ // An i1 select is: (c & t) | (!c & f).
+ bool Inv;
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
+
+ unsigned SRI;
+ switch (Idx) {
+ default: llvm_unreachable("Invalid CC index");
+ case 0: SRI = PPC::sub_lt; break;
+ case 1: SRI = PPC::sub_gt; break;
+ case 2: SRI = PPC::sub_eq; break;
+ case 3: SRI = PPC::sub_un; break;
+ }
+
+ SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
+
+ SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
+ CCBit, CCBit), 0);
+ SDValue C = Inv ? NotCCBit : CCBit,
+ NotC = Inv ? CCBit : NotCCBit;
+
+ SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ C, N->getOperand(2)), 0);
+ SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
+ NotC, N->getOperand(3)), 0);
+
+ return CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
+ }
+
+ unsigned BROpc = getPredicateForSetCC(CC);
+
+ unsigned SelectCCOp;
+ if (N->getValueType(0) == MVT::i32)
+ SelectCCOp = PPC::SELECT_CC_I4;
+ else if (N->getValueType(0) == MVT::i64)
+ SelectCCOp = PPC::SELECT_CC_I8;
+ else if (N->getValueType(0) == MVT::f32)
+ SelectCCOp = PPC::SELECT_CC_F4;
+ else if (N->getValueType(0) == MVT::f64)
+ SelectCCOp = PPC::SELECT_CC_F8;
+ else
+ SelectCCOp = PPC::SELECT_CC_VRRC;
+
+ SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+ getI32Imm(BROpc) };
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
+ }
+ case ISD::VSELECT:
+ if (PPCSubTarget->hasVSX()) {
+ SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
+ }
+
+ break;
+ case ISD::VECTOR_SHUFFLE:
+ if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ N->getValueType(0) == MVT::v2i64)) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+
+ SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
+ Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
+ unsigned DM[2];
+
+ for (int i = 0; i < 2; ++i)
+ if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
+ DM[i] = 0;
+ else
+ DM[i] = 1;
+
+ SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), MVT::i32);
+
+ if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
+ Op1.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+ isa<LoadSDNode>(Op1.getOperand(0))) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
+ SDValue Base, Offset;
+
+ if (LD->isUnindexed() &&
+ SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
+ SDValue Chain = LD->getChain();
+ SDValue Ops[] = { Base, Offset, Chain };
+ return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
+ N->getValueType(0), Ops);
+ }
+ }
+
+ SDValue Ops[] = { Op1, Op2, DMV };
+ return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
+ }
+
+ break;
+ case PPCISD::BDNZ:
+ case PPCISD::BDZ: {
+ bool IsPPC64 = PPCSubTarget->isPPC64();
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
+ (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
+ MVT::Other, Ops);
+ }
+ case PPCISD::COND_BRANCH: {
+ // Op #0 is the Chain.
+ // Op #1 is the PPC::PRED_* number.
+ // Op #2 is the CR#
+ // Op #3 is the Dest MBB
+ // Op #4 is the Flag.
+ // Prevent PPC::PRED_* from being selected into LI.
+ SDValue Pred =
+ getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+ SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+ N->getOperand(0), N->getOperand(4) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ }
+ case ISD::BR_CC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ unsigned PCC = getPredicateForSetCC(CC);
+
+ if (N->getOperand(2).getValueType() == MVT::i1) {
+ unsigned Opc;
+ bool Swap;
+ switch (PCC) {
+ default: llvm_unreachable("Unexpected Boolean-operand predicate");
+ case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
+ case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
+ case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
+ case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
+ case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
+ case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
+ }
+
+ SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
+ N->getOperand(Swap ? 3 : 2),
+ N->getOperand(Swap ? 2 : 3)), 0);
+ return CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other,
+ BitComp, N->getOperand(4), N->getOperand(0));
+ }
+
+ SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+ SDValue Ops[] = { getI32Imm(PCC), CondCode,
+ N->getOperand(4), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
+ }
+ case ISD::BRIND: {
+ // FIXME: Should custom lower this.
+ SDValue Chain = N->getOperand(0);
+ SDValue Target = N->getOperand(1);
+ unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+ unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
+ Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
+ Chain), 0);
+ return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
+ }
+ case PPCISD::TOC_ENTRY: {
+ if (PPCSubTarget->isSVR4ABI() && !PPCSubTarget->isPPC64()) {
+ SDValue GA = N->getOperand(0);
+ return CurDAG->getMachineNode(PPC::LWZtoc, dl, MVT::i32, GA,
+ N->getOperand(1));
+ }
+ assert (PPCSubTarget->isPPC64() &&
+ "Only supported for 64-bit ABI and 32-bit SVR4");
+
+ // For medium and large code model, we generate two instructions as
+ // described below. Otherwise we allow SelectCodeCommon to handle this,
+ // selecting one of LDtoc, LDtocJTI, and LDtocCPT.
+ CodeModel::Model CModel = TM.getCodeModel();
+ if (CModel != CodeModel::Medium && CModel != CodeModel::Large)
+ break;
+
+ // The first source operand is a TargetGlobalAddress or a TargetJumpTable.
+ // If it is an externally defined symbol, a symbol with common linkage,
+ // a non-local function address, or a jump table address, or if we are
+ // generating code for large code model, we generate:
+ // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>)
+ SDValue GA = N->getOperand(0);
+ SDValue TOCbase = N->getOperand(1);
+ SDNode *Tmp = CurDAG->getMachineNode(PPC::ADDIStocHA, dl, MVT::i64,
+ TOCbase, GA);
+
+ if (isa<JumpTableSDNode>(GA) || CModel == CodeModel::Large)
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
+ const GlobalValue *GValue = G->getGlobal();
+ if ((GValue->getType()->getElementType()->isFunctionTy() &&
+ (GValue->isDeclaration() || GValue->isWeakForLinker())) ||
+ GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage())
+ return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
+ SDValue(Tmp, 0));
+ }
+
+ return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
+ SDValue(Tmp, 0), GA);
+ }
+ case PPCISD::PPC32_PICGOT: {
+ // Generate a PIC-safe GOT reference.
+ assert(!PPCSubTarget->isPPC64() && PPCSubTarget->isSVR4ABI() &&
+ "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
+ return CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT, PPCLowering->getPointerTy(), MVT::i32);
+ }
+ case PPCISD::VADD_SPLAT: {
+ // This expands into one of three sequences, depending on whether
+ // the first operand is odd or even, positive or negative.
+ assert(isa<ConstantSDNode>(N->getOperand(0)) &&
+ isa<ConstantSDNode>(N->getOperand(1)) &&
+ "Invalid operand on VADD_SPLAT!");
+
+ int Elt = N->getConstantOperandVal(0);
+ int EltSize = N->getConstantOperandVal(1);
+ unsigned Opc1, Opc2, Opc3;
+ EVT VT;
+
+ if (EltSize == 1) {
+ Opc1 = PPC::VSPLTISB;
+ Opc2 = PPC::VADDUBM;
+ Opc3 = PPC::VSUBUBM;
+ VT = MVT::v16i8;
+ } else if (EltSize == 2) {
+ Opc1 = PPC::VSPLTISH;
+ Opc2 = PPC::VADDUHM;
+ Opc3 = PPC::VSUBUHM;
+ VT = MVT::v8i16;
+ } else {
+ assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
+ Opc1 = PPC::VSPLTISW;
+ Opc2 = PPC::VADDUWM;
+ Opc3 = PPC::VSUBUWM;
+ VT = MVT::v4i32;
+ }
+
+ if ((Elt & 1) == 0) {
+ // Elt is even, in the range [-32,-18] + [16,30].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp = VSPLTIS[BHW] elt
+ // VADDU[BHW]M tmp, tmp
+ // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
+ SDValue EltVal = getI32Imm(Elt >> 1);
+ SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ SDValue TmpVal = SDValue(Tmp, 0);
+ return CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal);
+
+ } else if (Elt > 0) {
+ // Elt is odd and positive, in the range [17,31].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt-16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VSUBU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt - 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+
+ } else {
+ // Elt is odd and negative, in the range [-31,-17].
+ //
+ // Convert: VADD_SPLAT elt, size
+ // Into: tmp1 = VSPLTIS[BHW] elt+16
+ // tmp2 = VSPLTIS[BHW] -16
+ // VADDU[BHW]M tmp1, tmp2
+ SDValue EltVal = getI32Imm(Elt + 16);
+ SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ EltVal = getI32Imm(-16);
+ SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
+ return CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
+ SDValue(Tmp2, 0));
+ }
+ }
+ }
+
+ return SelectCode(N);
+}
+
+/// PostprocessISelDAG - Perform some late peephole optimizations
+/// on the DAG representation.
+void PPCDAGToDAGISel::PostprocessISelDAG() {
+
+ // Skip peepholes at -O0.
+ if (TM.getOptLevel() == CodeGenOpt::None)
+ return;
+
+ PeepholePPC64();
+ PeepholeCROps();
+}
+
+// Check if all users of this node will become isel where the second operand
+// is the constant zero. If this is so, and if we can negate the condition,
+// then we can flip the true and false operands. This will allow the zero to
+// be folded with the isel so that we don't need to materialize a register
+// containing zero.
+bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
+ // If we're not using isel, then this does not matter.
+ if (!PPCSubTarget->hasISEL())
+ return false;
+
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (!User->isMachineOpcode())
+ return false;
+ if (User->getMachineOpcode() != PPC::SELECT_I4 &&
+ User->getMachineOpcode() != PPC::SELECT_I8)
+ return false;
+
+ SDNode *Op2 = User->getOperand(2).getNode();
+ if (!Op2->isMachineOpcode())
+ return false;
+
+ if (Op2->getMachineOpcode() != PPC::LI &&
+ Op2->getMachineOpcode() != PPC::LI8)
+ return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op2->getOperand(0));
+ if (!C)
+ return false;
+
+ if (!C->isNullValue())
+ return false;
+ }
+
+ return true;
+}
+
+void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
+ SmallVector<SDNode *, 4> ToReplace;
+ for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
+ User->getMachineOpcode() == PPC::SELECT_I8) &&
+ "Must have all select users");
+ ToReplace.push_back(User);
+ }
+
+ for (SmallVector<SDNode *, 4>::iterator UI = ToReplace.begin(),
+ UE = ToReplace.end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ SDNode *ResNode =
+ CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
+ User->getValueType(0), User->getOperand(0),
+ User->getOperand(2),
+ User->getOperand(1));
+
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(User->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(User, ResNode);
+ }
+}
+
+void PPCDAGToDAGISel::PeepholeCROps() {
+ bool IsModified;
+ do {
+ IsModified = false;
+ for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+ E = CurDAG->allnodes_end(); I != E; ++I) {
+ MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
+ if (!MachineNode || MachineNode->use_empty())
+ continue;
+ SDNode *ResNode = MachineNode;
+
+ bool Op1Set = false, Op1Unset = false,
+ Op1Not = false,
+ Op2Set = false, Op2Unset = false,
+ Op2Not = false;
+
+ unsigned Opcode = MachineNode->getMachineOpcode();
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ case PPC::CRNAND:
+ case PPC::CROR:
+ case PPC::CRXOR:
+ case PPC::CRNOR:
+ case PPC::CREQV:
+ case PPC::CRANDC:
+ case PPC::CRORC: {
+ SDValue Op = MachineNode->getOperand(1);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op2Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op2Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op2Not = true;
+ }
+ } // fallthrough
+ case PPC::BC:
+ case PPC::BCn:
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_VRRC: {
+ SDValue Op = MachineNode->getOperand(0);
+ if (Op.isMachineOpcode()) {
+ if (Op.getMachineOpcode() == PPC::CRSET)
+ Op1Set = true;
+ else if (Op.getMachineOpcode() == PPC::CRUNSET)
+ Op1Unset = true;
+ else if (Op.getMachineOpcode() == PPC::CRNOR &&
+ Op.getOperand(0) == Op.getOperand(1))
+ Op1Not = true;
+ }
+ }
+ break;
+ }
+
+ bool SelectSwap = false;
+ switch (Opcode) {
+ default: break;
+ case PPC::CRAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x & x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set)
+ // 1 & y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // x & 1 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset || Op2Unset)
+ // x & 0 = 0 & y = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // ~x & y = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x & ~y = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNAND:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // nand(x, x) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Set)
+ // nand(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // nand(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset || Op2Unset)
+ // nand(x, 0) = nand(0, y) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Not)
+ // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nand(x, ~y) = ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CROR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // x | x = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Set || Op2Set)
+ // x | 1 = 1 | y = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // 0 | y = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // x | 0 = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // ~x | y = orc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0).
+ getOperand(0));
+ else if (Op2Not)
+ // x | ~y = orc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRXOR:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // xor(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // xor(1, y) -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Set)
+ // xor(x, 1) -> nor(x, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Unset)
+ // xor(0, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Unset)
+ // xor(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // xor(~x, y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // xor(x, ~y) = eqv(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRNOR:
+ if (Op1Set || Op2Set)
+ // nor(1, y) -> 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Unset)
+ // nor(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // nor(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // nor(~x, y) = andc(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // nor(x, ~y) = andc(y, x)
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1).
+ getOperand(0),
+ MachineNode->getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CREQV:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // eqv(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // eqv(1, y) = y
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op2Set)
+ // eqv(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // eqv(0, y) = ~y -> nor(y, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op2Unset)
+ // eqv(x, 0) = ~x
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(0));
+ else if (Op1Not)
+ // eqv(~x, y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // eqv(x, ~y) = xor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1)),
+ SelectSwap = true;
+ break;
+ case PPC::CRANDC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // andc(x, x) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set)
+ // andc(1, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Unset || Op2Set)
+ // andc(0, y) = andc(x, 1) = 0
+ ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Unset)
+ // andc(x, 0) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Not)
+ // andc(~x, y) = ~(x | y) = nor(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // andc(x, ~y) = x & y
+ ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::CRORC:
+ if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
+ // orc(x, x) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op1Set || Op2Unset)
+ // orc(1, y) = orc(x, 0) = 1
+ ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
+ MVT::i1);
+ else if (Op2Set)
+ // orc(x, 1) = x
+ ResNode = MachineNode->getOperand(0).getNode();
+ else if (Op1Unset)
+ // orc(0, y) = ~y
+ ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(1));
+ else if (Op1Not)
+ // orc(~x, y) = ~(x & y) = nand(x, y)
+ ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1));
+ else if (Op2Not)
+ // orc(x, ~y) = x | y
+ ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(0),
+ MachineNode->getOperand(1).
+ getOperand(0));
+ else if (AllUsersSelectZero(MachineNode))
+ ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
+ MVT::i1, MachineNode->getOperand(1),
+ MachineNode->getOperand(0)),
+ SelectSwap = true;
+ break;
+ case PPC::SELECT_I4:
+ case PPC::SELECT_I8:
+ case PPC::SELECT_F4:
+ case PPC::SELECT_F8:
+ case PPC::SELECT_VRRC:
+ if (Op1Set)
+ ResNode = MachineNode->getOperand(1).getNode();
+ else if (Op1Unset)
+ ResNode = MachineNode->getOperand(2).getNode();
+ else if (Op1Not)
+ ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
+ SDLoc(MachineNode),
+ MachineNode->getValueType(0),
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(2),
+ MachineNode->getOperand(1));
+ break;
+ case PPC::BC:
+ case PPC::BCn:
+ if (Op1Not)
+ ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
+ PPC::BC,
+ SDLoc(MachineNode),
+ MVT::Other,
+ MachineNode->getOperand(0).
+ getOperand(0),
+ MachineNode->getOperand(1),
+ MachineNode->getOperand(2));
+ // FIXME: Handle Op1Set, Op1Unset here too.
+ break;
+ }
+
+ // If we're inverting this node because it is used only by selects that
+ // we'd like to swap, then swap the selects before the node replacement.
+ if (SelectSwap)
+ SwapAllSelectUsers(MachineNode);
+
+ if (ResNode != MachineNode) {
+ DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
+ DEBUG(MachineNode->dump(CurDAG));
+ DEBUG(dbgs() << "\nNew: ");
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ ReplaceUses(MachineNode, ResNode);
+ IsModified = true;
+ }
+ }
+ if (IsModified)
+ CurDAG->RemoveDeadNodes();
+ } while (IsModified);
+}
+
+void PPCDAGToDAGISel::PeepholePPC64() {
+ // These optimizations are currently supported only for 64-bit SVR4.
+ if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
+ return;
+
+ SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
+ ++Position;
+
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = --Position;
+ // Skip dead nodes and any non-machine opcodes.
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
+
+ unsigned FirstOp;
+ unsigned StorageOpcode = N->getMachineOpcode();
+
+ switch (StorageOpcode) {
+ default: continue;
+
+ case PPC::LBZ:
+ case PPC::LBZ8:
+ case PPC::LD:
+ case PPC::LFD:
+ case PPC::LFS:
+ case PPC::LHA:
+ case PPC::LHA8:
+ case PPC::LHZ:
+ case PPC::LHZ8:
+ case PPC::LWA:
+ case PPC::LWZ:
+ case PPC::LWZ8:
+ FirstOp = 0;
+ break;
+
+ case PPC::STB:
+ case PPC::STB8:
+ case PPC::STD:
+ case PPC::STFD:
+ case PPC::STFS:
+ case PPC::STH:
+ case PPC::STH8:
+ case PPC::STW:
+ case PPC::STW8:
+ FirstOp = 1;
+ break;
+ }
+
+ // If this is a load or store with a zero offset, we may be able to
+ // fold an add-immediate into the memory operation.
+ if (!isa<ConstantSDNode>(N->getOperand(FirstOp)) ||
+ N->getConstantOperandVal(FirstOp) != 0)
+ continue;
+
+ SDValue Base = N->getOperand(FirstOp + 1);
+ if (!Base.isMachineOpcode())
+ continue;
+
+ unsigned Flags = 0;
+ bool ReplaceFlags = true;
+
+ // When the feeding operation is an add-immediate of some sort,
+ // determine whether we need to add relocation information to the
+ // target flags on the immediate operand when we fold it into the
+ // load instruction.
+ //
+ // For something like ADDItocL, the relocation information is
+ // inferred from the opcode; when we process it in the AsmPrinter,
+ // we add the necessary relocation there. A load, though, can receive
+ // relocation from various flavors of ADDIxxx, so we need to carry
+ // the relocation information in the target flags.
+ switch (Base.getMachineOpcode()) {
+ default: continue;
+
+ case PPC::ADDI8:
+ case PPC::ADDI:
+ // In some cases (such as TLS) the relocation information
+ // is already in place on the operand, so copying the operand
+ // is sufficient.
+ ReplaceFlags = false;
+ // For these cases, the immediate may not be divisible by 4, in
+ // which case the fold is illegal for DS-form instructions. (The
+ // other cases provide aligned addresses and are always safe.)
+ if ((StorageOpcode == PPC::LWA ||
+ StorageOpcode == PPC::LD ||
+ StorageOpcode == PPC::STD) &&
+ (!isa<ConstantSDNode>(Base.getOperand(1)) ||
+ Base.getConstantOperandVal(1) % 4 != 0))
+ continue;
+ break;
+ case PPC::ADDIdtprelL:
+ Flags = PPCII::MO_DTPREL_LO;
+ break;
+ case PPC::ADDItlsldL:
+ Flags = PPCII::MO_TLSLD_LO;
+ break;
+ case PPC::ADDItocL:
+ Flags = PPCII::MO_TOC_LO;
+ break;
+ }
+
+ // We found an opportunity. Reverse the operands from the add
+ // immediate and substitute them into the load or store. If
+ // needed, update the target flags for the immediate operand to
+ // reflect the necessary relocation information.
+ DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
+ DEBUG(Base->dump(CurDAG));
+ DEBUG(dbgs() << "\nN: ");
+ DEBUG(N->dump(CurDAG));
+ DEBUG(dbgs() << "\n");
+
+ SDValue ImmOpnd = Base.getOperand(1);
+
+ // If the relocation information isn't already present on the
+ // immediate operand, add it now.
+ if (ReplaceFlags) {
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
+ SDLoc dl(GA);
+ const GlobalValue *GV = GA->getGlobal();
+ // We can't perform this optimization for data whose alignment
+ // is insufficient for the instruction encoding.
+ if (GV->getAlignment() < 4 &&
+ (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
+ StorageOpcode == PPC::LWA)) {
+ DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
+ continue;
+ }
+ ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
+ } else if (ConstantPoolSDNode *CP =
+ dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
+ const Constant *C = CP->getConstVal();
+ ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64,
+ CP->getAlignment(),
+ 0, Flags);
+ }
+ }
+
+ if (FirstOp == 1) // Store
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
+ else // Load
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
+
+ // The add-immediate may now be dead, in which case remove it.
+ if (Base.getNode()->use_empty())
+ CurDAG->RemoveDeadNode(Base.getNode());
+ }
+}
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+ return new PPCDAGToDAGISel(TM);
+}
+
+static void initializePassOnce(PassRegistry &Registry) {
+ const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID,
+ nullptr, false, false);
+ Registry.registerPass(*PI, true);
+}
+
+void llvm::initializePPCDAGToDAGISelPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializePassOnce);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 000000000000..1247e860f733
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,9290 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
+cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
+cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
+
+static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
+cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
+
+// FIXME: Remove this once the bug has been fixed!
+extern cl::opt<bool> ANDIGlueBug;
+
+static TargetLoweringObjectFile *createTLOF(const Triple &TT) {
+ // If it isn't a Mach-O file then it's going to be a linux ELF
+ // object file.
+ if (TT.isOSDarwin())
+ return new TargetLoweringObjectFileMachO();
+
+ return new PPC64LinuxTargetObjectFile();
+}
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+ : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))),
+ Subtarget(*TM.getSubtargetImpl()) {
+ setPow2DivIsCheap();
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+ // arguments are at least 4/8 bytes aligned.
+ bool isPPC64 = Subtarget.isPPC64();
+ setMinStackArgumentAlignment(isPPC64 ? 8:4);
+
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
+ addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
+ addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
+
+ // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // PowerPC has pre-inc load and store's.
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+ setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+ if (Subtarget.useCRBits()) {
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ if (isPPC64 || Subtarget.hasFPCVT()) {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
+ AddPromotedToType (ISD::UINT_TO_FP, MVT::i1,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ } else {
+ setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
+ }
+
+ // PowerPC does not support direct load / store of condition registers
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+ setOperationAction(ISD::STORE, MVT::i1, Custom);
+
+ // FIXME: Remove this once the ANDI glue bug is fixed:
+ if (ANDIGlueBug)
+ setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
+
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setTruncStoreAction(MVT::i64, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i1, Expand);
+ setTruncStoreAction(MVT::i8, MVT::i1, Expand);
+
+ addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
+ }
+
+ // This is used in the ppcf128->int sequence. Note it has different semantics
+ // from FP_ROUND: that rounds to nearest, this rounds to zero.
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+ // We do not currently implement these libm ops for PowerPC.
+ setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
+ setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
+
+ // PowerPC has no SREM/UREM instructions
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+ // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+ // We don't support sin/cos/sqrt/fmod/pow
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FPOW , MVT::f64, Expand);
+ setOperationAction(ISD::FMA , MVT::f64, Legal);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+ setOperationAction(ISD::FPOW , MVT::f32, Expand);
+ setOperationAction(ISD::FMA , MVT::f32, Legal);
+
+ setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+ // If we're enabling GP optimizations, use hardware square root
+ if (!Subtarget.hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+
+ if (!Subtarget.hasFSQRT() &&
+ !(TM.Options.UnsafeFPMath &&
+ Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ if (Subtarget.hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
+
+ if (Subtarget.hasFPRND()) {
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
+ }
+
+ // PowerPC does not have BSWAP, CTPOP or CTTZ
+ setOperationAction(ISD::BSWAP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+
+ if (Subtarget.hasPOPCNTD()) {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Legal);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Legal);
+ } else {
+ setOperationAction(ISD::CTPOP, MVT::i32 , Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64 , Expand);
+ }
+
+ // PowerPC does not have ROTR
+ setOperationAction(ISD::ROTR, MVT::i32 , Expand);
+ setOperationAction(ISD::ROTR, MVT::i64 , Expand);
+
+ if (!Subtarget.useCRBits()) {
+ // PowerPC does not have Select
+ setOperationAction(ISD::SELECT, MVT::i32, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT, MVT::f64, Expand);
+ }
+
+ // PowerPC wants to turn select_cc of FP into fsel when possible.
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+ // PowerPC wants to optimize integer setcc a bit
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+ // PowerPC does not have BRCOND which requires SetCC
+ if (!Subtarget.useCRBits())
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+ // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+ // PowerPC does not have [U|S]INT_TO_FP
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
+ // SjLj exception handling but a light-weight setjmp/longjmp replacement to
+ // support continuation, user-level threading, and etc.. As a result, no
+ // other SjLj exception interfaces are implemented and please don't build
+ // your own exception handling based on them.
+ // LLVM/Clang supports zero-cost DWARF exception handling.
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+
+ // TRAP is legal.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+ // TRAMPOLINE is custom lowered.
+ setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
+ setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ if (Subtarget.isSVR4ABI()) {
+ if (isPPC64) {
+ // VAARG always uses double-word chunks, so promote anything smaller.
+ setOperationAction(ISD::VAARG, MVT::i1, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i8, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i16, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::i32, Promote);
+ AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64);
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ } else {
+ // VAARG is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::i64, Custom);
+ }
+ } else
+ setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+ if (Subtarget.isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
+
+ // We want to custom lower some of our intrinsics.
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+ // To handle counter-based loop conditions.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+
+ // Comparisons that require checking two conditions.
+ setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+ if (Subtarget.has64BitSupport()) {
+ // They also have instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+ // This is just the low 32 bits of a (signed) fp->i64 conversion.
+ // We cannot do this with Promote because i64 is not a legal type.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+ if (Subtarget.hasLFIWAX() || Subtarget.isPPC64())
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ } else {
+ // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ }
+
+ // With the instructions enabled under FPCVT, we can do everything.
+ if (Subtarget.hasFPCVT()) {
+ if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ }
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ }
+
+ if (Subtarget.use64BitRegs()) {
+ // 64-bit PowerPC implementations can support i64 types directly
+ addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+ // 64-bit PowerPC wants to expand i128 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ } else {
+ // 32-bit PowerPC wants to expand i64 shifts itself.
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+ }
+
+ if (Subtarget.hasAltivec()) {
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD , VT, Legal);
+ setOperationAction(ISD::SUB , VT, Legal);
+
+ // We promote all shuffles to v16i8.
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+ AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+ // We promote all non-typed operations to v4i32.
+ setOperationAction(ISD::AND , VT, Promote);
+ AddPromotedToType (ISD::AND , VT, MVT::v4i32);
+ setOperationAction(ISD::OR , VT, Promote);
+ AddPromotedToType (ISD::OR , VT, MVT::v4i32);
+ setOperationAction(ISD::XOR , VT, Promote);
+ AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
+ setOperationAction(ISD::LOAD , VT, Promote);
+ AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+ setOperationAction(ISD::STORE, VT, Promote);
+ AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+ // No other operations are legal.
+ setOperationAction(ISD::MUL , VT, Expand);
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+ setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FPOWI, VT, Expand);
+ setOperationAction(ISD::FFLOOR, VT, Expand);
+ setOperationAction(ISD::FCEIL, VT, Expand);
+ setOperationAction(ISD::FTRUNC, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UDIVREM, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
+ setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, InnerVT, Expand);
+ }
+ setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT, Expand);
+ }
+
+ // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+ // with merges, splats, etc.
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::AND , MVT::v4i32, Legal);
+ setOperationAction(ISD::OR , MVT::v4i32, Legal);
+ setOperationAction(ISD::XOR , MVT::v4i32, Legal);
+ setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::v4i32,
+ Subtarget.useCRBits() ? Legal : Expand);
+ setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
+
+ addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
+ addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
+
+ setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::FMA, MVT::v4f32, Legal);
+
+ if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) {
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ }
+
+ setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+
+ // Altivec does not contain unordered floating-point compare instructions
+ setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
+
+ if (Subtarget.hasVSX()) {
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
+
+ setOperationAction(ISD::MUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::FMA, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v8i16, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
+
+ // Share the Altivec comparison restrictions.
+ setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
+
+ setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STORE, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
+
+ addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
+
+ addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
+ addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
+
+ // VSX v2i64 only supports non-arithmetic operations.
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SUB, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRA, MVT::v2i64, Expand);
+ setOperationAction(ISD::SRL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
+
+ setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
+ setOperationAction(ISD::STORE, MVT::v2i64, Promote);
+ AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
+
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
+
+ // Vector operation legalization checks the result type of
+ // SIGN_EXTEND_INREG, overall legalization checks the inner type.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+
+ addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
+ }
+ }
+
+ if (Subtarget.has64BitSupport()) {
+ setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
+ }
+
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
+
+ setBooleanContents(ZeroOrOneBooleanContent);
+ // Altivec instructions set fields to all zeros or all ones.
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
+
+ if (!isPPC64) {
+ // These libcalls are not available in 32-bit.
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
+ }
+
+ if (isPPC64) {
+ setStackPointerRegisterToSaveRestore(PPC::X1);
+ setExceptionPointerRegister(PPC::X3);
+ setExceptionSelectorRegister(PPC::X4);
+ } else {
+ setStackPointerRegisterToSaveRestore(PPC::R1);
+ setExceptionPointerRegister(PPC::R3);
+ setExceptionSelectorRegister(PPC::R4);
+ }
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::LOAD);
+ setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::BR_CC);
+ if (Subtarget.useCRBits())
+ setTargetDAGCombine(ISD::BRCOND);
+ setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ if (Subtarget.useCRBits()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::SELECT_CC);
+ }
+
+ // Use reciprocal estimates.
+ if (TM.Options.UnsafeFPMath) {
+ setTargetDAGCombine(ISD::FDIV);
+ setTargetDAGCombine(ISD::FSQRT);
+ }
+
+ // Darwin long double math library functions have $LDBL128 appended.
+ if (Subtarget.isDarwin()) {
+ setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+ setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+ setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+ setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+ setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+ setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+ setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+ setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+ setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+ setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+ }
+
+ // With 32 condition bits, we don't need to sink (and duplicate) compares
+ // aggressively in CodeGenPrep.
+ if (Subtarget.useCRBits())
+ setHasMultipleConditionRegisters();
+
+ setMinFunctionAlignment(2);
+ if (Subtarget.isDarwin())
+ setPrefFunctionAlignment(4);
+
+ if (isPPC64 && Subtarget.isJITCodeModel())
+ // Temporary workaround for the inability of PPC64 JIT to handle jump
+ // tables.
+ setSupportJumpTables(false);
+
+ setInsertFencesForAtomic(true);
+
+ if (Subtarget.enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
+
+ computeRegisterProperties();
+
+ // The Freescale cores does better with aggressive inlining of memcpy and
+ // friends. Gcc uses same threshold of 128 bytes (= 32 word stores).
+ if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc ||
+ Subtarget.getDarwinDirective() == PPC::DIR_E5500) {
+ MaxStoresPerMemset = 32;
+ MaxStoresPerMemsetOptSize = 16;
+ MaxStoresPerMemcpy = 32;
+ MaxStoresPerMemcpyOptSize = 8;
+ MaxStoresPerMemmove = 32;
+ MaxStoresPerMemmoveOptSize = 8;
+
+ setPrefFunctionAlignment(4);
+ }
+}
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
+ // Darwin passes everything on 4 byte boundary.
+ if (Subtarget.isDarwin())
+ return 4;
+
+ // 16byte and wider vectors are passed on 16byte boundary.
+ // The rest is 8 on PPC64 and 4 on PPC32 boundary.
+ unsigned Align = Subtarget.isPPC64() ? 8 : 4;
+ if (Subtarget.hasAltivec() || Subtarget.hasQPX())
+ getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16);
+ return Align;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return nullptr;
+ case PPCISD::FSEL: return "PPCISD::FSEL";
+ case PPCISD::FCFID: return "PPCISD::FCFID";
+ case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
+ case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
+ case PPCISD::FRE: return "PPCISD::FRE";
+ case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
+ case PPCISD::STFIWX: return "PPCISD::STFIWX";
+ case PPCISD::VMADDFP: return "PPCISD::VMADDFP";
+ case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP";
+ case PPCISD::VPERM: return "PPCISD::VPERM";
+ case PPCISD::Hi: return "PPCISD::Hi";
+ case PPCISD::Lo: return "PPCISD::Lo";
+ case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
+ case PPCISD::LOAD: return "PPCISD::LOAD";
+ case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
+ case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
+ case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
+ case PPCISD::SRL: return "PPCISD::SRL";
+ case PPCISD::SRA: return "PPCISD::SRA";
+ case PPCISD::SHL: return "PPCISD::SHL";
+ case PPCISD::CALL: return "PPCISD::CALL";
+ case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
+ case PPCISD::MTCTR: return "PPCISD::MTCTR";
+ case PPCISD::BCTRL: return "PPCISD::BCTRL";
+ case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
+ case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
+ case PPCISD::VCMP: return "PPCISD::VCMP";
+ case PPCISD::VCMPo: return "PPCISD::VCMPo";
+ case PPCISD::LBRX: return "PPCISD::LBRX";
+ case PPCISD::STBRX: return "PPCISD::STBRX";
+ case PPCISD::LARX: return "PPCISD::LARX";
+ case PPCISD::STCX: return "PPCISD::STCX";
+ case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::BDNZ: return "PPCISD::BDNZ";
+ case PPCISD::BDZ: return "PPCISD::BDZ";
+ case PPCISD::MFFS: return "PPCISD::MFFS";
+ case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
+ case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
+ case PPCISD::CR6SET: return "PPCISD::CR6SET";
+ case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
+ case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
+ case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
+ case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
+ case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
+ case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
+ case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
+ case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
+ case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
+ case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
+ case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
+ case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
+ case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
+ case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
+ case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
+ case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
+ case PPCISD::SC: return "PPCISD::SC";
+ }
+}
+
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+ return CFP->getValueAPF().isZero();
+ else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+ // Maybe this has already been legalized into the constant pool?
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+ return CFP->getValueAPF().isZero();
+ }
+ return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+ return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (ShuffleKind == 0) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
+ return false;
+ } else if (ShuffleKind == 2) {
+ if (!DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
+ for (unsigned i = 0; i != 16; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1;
+ for (unsigned i = 0; i != 8; ++i)
+ if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
+ return false;
+ }
+ return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+/// The ShuffleKind distinguishes between big-endian operations with
+/// two different inputs (0), either-endian operations with two identical
+/// inputs (1), and little-endian operantion with two different inputs (2).
+/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (ShuffleKind == 0) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
+ return false;
+ } else if (ShuffleKind == 2) {
+ if (!DAG.getTarget().getDataLayout()->isLittleEndian())
+ return false;
+ for (unsigned i = 0; i != 16; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
+ return false;
+ } else if (ShuffleKind == 1) {
+ unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 2;
+ for (unsigned i = 0; i != 8; i += 2)
+ if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
+ !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
+ !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
+ return false;
+ }
+ return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned LHSStart, unsigned RHSStart) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return false;
+ assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+ "Unsupported merge size!");
+
+ for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
+ for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
+ if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+ LHSStart+j+i*UnitSize) ||
+ !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+ RHSStart+j+i*UnitSize))
+ return false;
+ }
+ return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, UnitSize, 0, 16);
+ else
+ return false;
+ } else {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 0) // normal
+ return isVMerge(N, UnitSize, 8, 24);
+ else
+ return false;
+ }
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
+/// The ShuffleKind distinguishes between big-endian merges with two
+/// different inputs (0), either-endian merges with two identical inputs (1),
+/// and little-endian merges with two different inputs (2). For the latter,
+/// the input operands are swapped (see PPCInstrAltivec.td).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG) {
+ if (DAG.getTarget().getDataLayout()->isLittleEndian()) {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 8, 8);
+ else if (ShuffleKind == 2) // swapped
+ return isVMerge(N, UnitSize, 8, 24);
+ else
+ return false;
+ } else {
+ if (ShuffleKind == 1) // unary
+ return isVMerge(N, UnitSize, 0, 0);
+ else if (ShuffleKind == 0) // normal
+ return isVMerge(N, UnitSize, 0, 16);
+ else
+ return false;
+ }
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+/// The ShuffleKind distinguishes between big-endian operations with two
+/// different inputs (0), either-endian operations with two identical inputs
+/// (1), and little-endian operations with two different inputs (2). For the
+/// latter, the input operands are swapped (see PPCInstrAltivec.td).
+int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::v16i8)
+ return -1;
+
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+ // Find the first non-undef value in the shuffle mask.
+ unsigned i;
+ for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+ /*search*/;
+
+ if (i == 16) return -1; // all undef.
+
+ // Otherwise, check to see if the rest of the elements are consecutively
+ // numbered from this value.
+ unsigned ShiftAmt = SVOp->getMaskElt(i);
+ if (ShiftAmt < i) return -1;
+
+ ShiftAmt -= i;
+ bool isLE = DAG.getTarget().getDataLayout()->isLittleEndian();
+
+ if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+ return -1;
+ } else if (ShuffleKind == 1) {
+ // Check the rest of the elements to see if they are consecutive.
+ for (++i; i != 16; ++i)
+ if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+ return -1;
+ } else
+ return -1;
+
+ if (ShuffleKind == 2 && isLE)
+ ShiftAmt = 16 - ShiftAmt;
+
+ return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+ assert(N->getValueType(0) == MVT::v16i8 &&
+ (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+ // This is a splat operation if each element of the permute is the same, and
+ // if the value doesn't reference the second vector.
+ unsigned ElementBase = N->getMaskElt(0);
+
+ // FIXME: Handle UNDEF elements too!
+ if (ElementBase >= 16)
+ return false;
+
+ // Check that the indices are consecutive, in the case of a multi-byte element
+ // splatted with a v16i8 mask.
+ for (unsigned i = 1; i != EltSize; ++i)
+ if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+ return false;
+
+ for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+ if (N->getMaskElt(i) < 0) continue;
+ for (unsigned j = 0; j != EltSize; ++j)
+ if (N->getMaskElt(i+j) != N->getMaskElt(j))
+ return false;
+ }
+ return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+ BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+ APInt APVal, APUndef;
+ unsigned BitSize;
+ bool HasAnyUndefs;
+
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
+ if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+ return CFP->getValueAPF().isNegZero();
+
+ return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
+ SelectionDAG &DAG) {
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ assert(isSplatShuffleMask(SVOp, EltSize));
+ if (DAG.getTarget().getDataLayout()->isLittleEndian())
+ return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
+ else
+ return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted. The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+ SDValue OpVal(nullptr, 0);
+
+ // If ByteSize of the splat is bigger than the element size of the
+ // build_vector, then we have a case where we are checking for a splat where
+ // multiple elements of the buildvector are folded together into a single
+ // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+ unsigned EltSize = 16/N->getNumOperands();
+ if (EltSize < ByteSize) {
+ unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
+ SDValue UniquedVals[4];
+ assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+ // See if all of the elements in the buildvector agree across.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ // If the element isn't a constant, bail fully out.
+ if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+ if (!UniquedVals[i&(Multiple-1)].getNode())
+ UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+ else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+ return SDValue(); // no match.
+ }
+
+ // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+ // either constant or undef values that are identical for each chunk. See
+ // if these chunks can form into a larger vspltis*.
+
+ // Check to see if all of the leading entries are either 0 or -1. If
+ // neither, then this won't fit into the immediate field.
+ bool LeadingZero = true;
+ bool LeadingOnes = true;
+ for (unsigned i = 0; i != Multiple-1; ++i) {
+ if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
+
+ LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+ LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+ }
+ // Finally, check the least significant entry.
+ if (LeadingZero) {
+ if (!UniquedVals[Multiple-1].getNode())
+ return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
+ int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+ if (Val < 16)
+ return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
+ }
+ if (LeadingOnes) {
+ if (!UniquedVals[Multiple-1].getNode())
+ return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
+ int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+ if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
+ return DAG.getTargetConstant(Val, MVT::i32);
+ }
+
+ return SDValue();
+ }
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (!OpVal.getNode())
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return SDValue();
+ }
+
+ if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
+
+ unsigned ValSizeInBytes = EltSize;
+ uint64_t Value = 0;
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ Value = CN->getZExtValue();
+ } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+ assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+ Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ }
+
+ // If the splat value is larger than the element value, then we can never do
+ // this splat. The only case that we could fit the replicated bits into our
+ // immediate field for would be zero, and we prefer to use vxor for it.
+ if (ValSizeInBytes < ByteSize) return SDValue();
+
+ // If the element value is larger than the splat value, cut it in half and
+ // check to see if the two halves are equal. Continue doing this until we
+ // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
+ while (ValSizeInBytes > ByteSize) {
+ ValSizeInBytes >>= 1;
+
+ // If the top half equals the bottom half, we're still ok.
+ if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+ (Value & ((1 << (8*ValSizeInBytes))-1)))
+ return SDValue();
+ }
+
+ // Properly sign extend the value.
+ int MaskVal = SignExtend32(Value, ByteSize * 8);
+
+ // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+ if (MaskVal == 0) return SDValue();
+
+ // Finally, if this value fits in a 5 bit sext field, return it
+ if (SignExtend32<5>(MaskVal) == MaskVal)
+ return DAG.getTargetConstant(MaskVal, MVT::i32);
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+ if (!isa<ConstantSDNode>(N))
+ return false;
+
+ Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+ return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ short imm = 0;
+ if (N.getOpcode() == ISD::ADD) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i
+ if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+ return false; // r+i
+
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ } else if (N.getOpcode() == ISD::OR) {
+ if (isIntS16Immediate(N.getOperand(1), imm))
+ return false; // r+i can fold it if we can.
+
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are provably
+ // disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ APInt RHSKnownZero, RHSKnownOne;
+ DAG.computeKnownBits(N.getOperand(0),
+ LHSKnownZero, LHSKnownOne);
+
+ if (LHSKnownZero.getBoolValue()) {
+ DAG.computeKnownBits(N.getOperand(1),
+ RHSKnownZero, RHSKnownOne);
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (~(LHSKnownZero | RHSKnownZero) == 0) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg. If Aligned is true, only accept displacements
+/// suitable for STD and friends, i.e. multiples of 4.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+ SDValue &Base,
+ SelectionDAG &DAG,
+ bool Aligned) const {
+ // FIXME dl should come from parent load or store, not from address
+ SDLoc dl(N);
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddressRegReg(N, Disp, Base, DAG))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
+ Disp = DAG.getTargetConstant(imm, N.getValueType());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ return true; // [r+i]
+ } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+ // Match LOAD (ADD (X, Lo(G))).
+ assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+ && "Cannot handle constant offsets yet!");
+ Disp = N.getOperand(1).getOperand(0); // The global address.
+ assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+ Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
+ Disp.getOpcode() == ISD::TargetConstantPool ||
+ Disp.getOpcode() == ISD::TargetJumpTable);
+ Base = N.getOperand(0);
+ return true; // [&g+r]
+ }
+ } else if (N.getOpcode() == ISD::OR) {
+ short imm = 0;
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
+ // If this is an or of disjoint bitfields, we can codegen this as an add
+ // (for better address arithmetic) if the LHS and RHS of the OR are
+ // provably disjoint.
+ APInt LHSKnownZero, LHSKnownOne;
+ DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+
+ if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+ // If all of the bits are known zero on the LHS or RHS, the add won't
+ // carry.
+ if (FrameIndexSDNode *FI =
+ dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ Disp = DAG.getTargetConstant(imm, N.getValueType());
+ return true;
+ }
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+
+ // If this address fits entirely in a 16-bit sext immediate field, codegen
+ // this as "d, 0"
+ short Imm;
+ if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
+ Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ CN->getValueType(0));
+ return true;
+ }
+
+ // Handle 32-bit sext immediates with LIS + addr mode.
+ if ((CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
+ (!Aligned || (CN->getZExtValue() & 3) == 0)) {
+ int Addr = (int)CN->getZExtValue();
+
+ // Otherwise, break this down into an LIS + disp.
+ Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+ Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+ unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+ Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+ return true;
+ }
+ }
+
+ Disp = DAG.getTargetConstant(0, getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
+ Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+ SDValue &Index,
+ SelectionDAG &DAG) const {
+ // Check to see if we can easily represent this as an [r+r] address. This
+ // will fail if it thinks that the address is more profitably represented as
+ // reg+imm, e.g. where imm = 0.
+ if (SelectAddressRegReg(N, Base, Index, DAG))
+ return true;
+
+ // If the operand is an addition, always emit this as [r+r], since this is
+ // better (for code size, and execution, as the memop does the add for free)
+ // than emitting an explicit add.
+ if (N.getOpcode() == ISD::ADD) {
+ Base = N.getOperand(0);
+ Index = N.getOperand(1);
+ return true;
+ }
+
+ // Otherwise, do it the hard way, using R0 as the base register.
+ Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
+ N.getValueType());
+ Index = N;
+ return true;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ if (DisablePPCPreinc) return false;
+
+ bool isLoad = true;
+ SDValue Ptr;
+ EVT VT;
+ unsigned Alignment;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ Ptr = LD->getBasePtr();
+ VT = LD->getMemoryVT();
+ Alignment = LD->getAlignment();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ Ptr = ST->getBasePtr();
+ VT = ST->getMemoryVT();
+ Alignment = ST->getAlignment();
+ isLoad = false;
+ } else
+ return false;
+
+ // PowerPC doesn't have preinc load/store instructions for vectors.
+ if (VT.isVector())
+ return false;
+
+ if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
+
+ // Common code will reject creating a pre-inc form if the base pointer
+ // is a frame index, or if N is a store and the base pointer is either
+ // the same as or a predecessor of the value being stored. Check for
+ // those situations here, and try with swapped Base/Offset instead.
+ bool Swap = false;
+
+ if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
+ Swap = true;
+ else if (!isLoad) {
+ SDValue Val = cast<StoreSDNode>(N)->getValue();
+ if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
+ Swap = true;
+ }
+
+ if (Swap)
+ std::swap(Base, Offset);
+
+ AM = ISD::PRE_INC;
+ return true;
+ }
+
+ // LDU/STU can only handle immediates that are a multiple of 4.
+ if (VT != MVT::i64) {
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
+ return false;
+ } else {
+ // LDU/STU need an address with at least 4-byte alignment.
+ if (Alignment < 4)
+ return false;
+
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
+ return false;
+ }
+
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
+ // sext i32 to i64 when addr mode is r+i.
+ if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+ LD->getExtensionType() == ISD::SEXTLOAD &&
+ isa<ConstantSDNode>(Offset))
+ return false;
+ }
+
+ AM = ISD::PRE_INC;
+ return true;
+}
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+ unsigned &LoOpFlags,
+ const GlobalValue *GV = nullptr) {
+ HiOpFlags = PPCII::MO_HA;
+ LoOpFlags = PPCII::MO_LO;
+
+ // Don't use the pic base if not in PIC relocation model.
+ bool isPIC = TM.getRelocationModel() == Reloc::PIC_;
+
+ if (isPIC) {
+ HiOpFlags |= PPCII::MO_PIC_FLAG;
+ LoOpFlags |= PPCII::MO_PIC_FLAG;
+ }
+
+ // If this is a reference to a global value that requires a non-lazy-ptr, make
+ // sure that instruction lowering adds it.
+ if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+ HiOpFlags |= PPCII::MO_NLP_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+ if (GV->hasHiddenVisibility()) {
+ HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+ }
+ }
+
+ return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+ SelectionDAG &DAG) {
+ EVT PtrVT = HiPart.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDLoc DL(HiPart);
+
+ SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+ SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+ // With PIC, the first instruction is actually "GR+hi(&G)".
+ if (isPIC)
+ Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+ // Generate non-pic code that has direct accesses to the constant pool.
+ // The address of the global is just (hi(&g)+lo(&g)).
+ return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(),
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(CP);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
+ SDValue CPIHi =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+ SDValue CPILo =
+ DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+ return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ PPCII::MO_PIC_FLAG);
+ SDLoc DL(GA);
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT));
+ }
+
+ SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+ SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+ return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+ SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
+ SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
+ return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+ SDLoc dl(GA);
+ const GlobalValue *GV = GA->getGlobal();
+ EVT PtrVT = getPointerTy();
+ bool is64bit = Subtarget.isPPC64();
+
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
+
+ if (Model == TLSModel::LocalExec) {
+ SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL_HA);
+ SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TPREL_LO);
+ SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
+ return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
+ }
+
+ if (Model == TLSModel::InitialExec) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
+ PtrVT, GOTReg, TGA);
+ } else
+ GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
+ SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
+ PtrVT, TGA, GOTPtr);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
+ }
+
+ if (Model == TLSModel::GeneralDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
+ GOTPtr, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl,
+ is64bit ? PPC::X3 : PPC::R3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLS_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr);
+ return DAG.getCopyFromReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, PtrVT);
+ }
+
+ if (Model == TLSModel::LocalDynamic) {
+ SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue GOTPtr;
+ if (is64bit) {
+ SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
+ GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
+ GOTReg, TGA);
+ } else {
+ GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
+ }
+ SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
+ GOTPtr, TGA);
+
+ // We need a chain node, and don't have one handy. The underlying
+ // call has no side effects, so using the function entry node
+ // suffices.
+ SDValue Chain = DAG.getEntryNode();
+ Chain = DAG.getCopyToReg(Chain, dl,
+ is64bit ? PPC::X3 : PPC::R3, GOTEntry);
+ SDValue ParmReg = DAG.getRegister(is64bit ? PPC::X3 : PPC::R3,
+ is64bit ? MVT::i64 : MVT::i32);
+ SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
+ PtrVT, ParmReg, TGA);
+ // The return value from GET_TLSLD_ADDR really is in X3 already, but
+ // some hacks are needed here to tie everything together. The extra
+ // copies dissolve during subsequent transforms.
+ Chain = DAG.getCopyToReg(Chain, dl, is64bit ? PPC::X3 : PPC::R3, TLSAddr);
+ SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
+ Chain, ParmReg, TGA);
+ return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
+ }
+
+ llvm_unreachable("Unknown TLS model!");
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ SDLoc DL(GSDN);
+ const GlobalValue *GV = GSDN->getGlobal();
+
+ // 64-bit SVR4 ABI code is always position-independent.
+ // The actual address of the GlobalValue is stored in the TOC.
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
+ DAG.getRegister(PPC::X2, MVT::i64));
+ }
+
+ unsigned MOHiFlag, MOLoFlag;
+ bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+
+ if (isPIC && Subtarget.isSVR4ABI()) {
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
+ GSDN->getOffset(),
+ PPCII::MO_PIC_FLAG);
+ return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA,
+ DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32));
+ }
+
+ SDValue GAHi =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+ SDValue GALo =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+ SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+ // If the global reference is actually to a non-lazy-pointer, we have to do an
+ // extra load to get the address of the global.
+ if (MOHiFlag & PPCII::MO_NLP_FLAG)
+ Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+ false, false, false, 0);
+ return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDLoc dl(Op);
+
+ if (Op.getValueType() == MVT::v2i64) {
+ // When the operands themselves are v2i64 values, we need to do something
+ // special because VSX has no underlying comparison operations for these.
+ if (Op.getOperand(0).getValueType() == MVT::v2i64) {
+ // Equality can be handled by casting to the legal type for Altivec
+ // comparisons, everything else needs to be expanded.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+ DAG.getSetCC(dl, MVT::v4i32,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
+ CC));
+ }
+
+ return SDValue();
+ }
+
+ // We handle most of these in the usual way.
+ return Op;
+ }
+
+ // If we're comparing for equality to zero, expose the fact that this is
+ // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+ // fold the new nodes.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (C->isNullValue() && CC == ISD::SETEQ) {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDValue Zext = Op.getOperand(0);
+ if (VT.bitsLT(MVT::i32)) {
+ VT = MVT::i32;
+ Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+ }
+ unsigned Log2b = Log2_32(VT.getSizeInBits());
+ SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+ SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+ DAG.getConstant(Log2b, MVT::i32));
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+ }
+ // Leave comparisons against 0 and -1 alone for now, since they're usually
+ // optimized. FIXME: revisit this when we can custom lower all setcc
+ // optimizations.
+ if (C->isAllOnesValue() || C->isNullValue())
+ return SDValue();
+ }
+
+ // If we have an integer seteq/setne, turn it into a compare against zero
+ // by xor'ing the rhs with the lhs, which is faster than setting a
+ // condition register, reading it back out, and masking the correct bit. The
+ // normal approach here uses sub to do this instead of xor. Using xor exposes
+ // the result to other bit-twiddling opportunities.
+ EVT LHSVT = Op.getOperand(0).getValueType();
+ if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ EVT VT = Op.getValueType();
+ SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+ }
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ SDNode *Node = Op.getNode();
+ EVT VT = Node->getValueType(0);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue InChain = Node->getOperand(0);
+ SDValue VAListPtr = Node->getOperand(1);
+ const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+ SDLoc dl(Node);
+
+ assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+ // gpr_index
+ SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ VAListPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = GprIndex.getValue(1);
+
+ if (VT == MVT::i64) {
+ // Check if GprIndex is even
+ SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+ DAG.getConstant(0, MVT::i32), ISD::SETNE);
+ SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+ DAG.getConstant(1, MVT::i32));
+ // Align GprIndex to be even if it isn't
+ GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+ GprIndex);
+ }
+
+ // fpr index is 1 byte after gpr
+ SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(1, MVT::i32));
+
+ // fpr
+ SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+ FprPtr, MachinePointerInfo(SV), MVT::i8,
+ false, false, 0);
+ InChain = FprIndex.getValue(1);
+
+ SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(8, MVT::i32));
+
+ SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+ DAG.getConstant(4, MVT::i32));
+
+ // areas
+ SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = OverflowArea.getValue(1);
+
+ SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ InChain = RegSaveArea.getValue(1);
+
+ // select overflow_area if index > 8
+ SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+ // adjustment constant gpr_index * 4/8
+ SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ // OurReg = RegSaveArea + RegConstant
+ SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+ RegConstant);
+
+ // Floating types are 32 bytes into RegSaveArea
+ if (VT.isFloatingPoint())
+ OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+ DAG.getConstant(32, MVT::i32));
+
+ // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+ SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+ VT.isInteger() ? GprIndex : FprIndex,
+ DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+ MVT::i32));
+
+ InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+ VT.isInteger() ? VAListPtr : FprPtr,
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+
+ // determine if we should load from reg_save_area or overflow_area
+ SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+ // increase overflow_area by 4/8 if gpr/fpr > 8
+ SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+ DAG.getConstant(VT.isInteger() ? 4 : 8,
+ MVT::i32));
+
+ OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+ OverflowAreaPlusN);
+
+ InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+ OverflowAreaPtr,
+ MachinePointerInfo(),
+ MVT::i32, false, false, 0);
+
+ return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, MVT::i32), 8, false, true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Op.getOperand(0);
+}
+
+SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue FPtr = Op.getOperand(2); // nested function
+ SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+ SDLoc dl(Op);
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = (PtrVT == MVT::i64);
+ Type *IntPtrTy =
+ DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType(
+ *DAG.getContext());
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+
+ Entry.Ty = IntPtrTy;
+ Entry.Node = Trmp; Args.push_back(Entry);
+
+ // TrampSize == (isPPC64 ? 48 : 40);
+ Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+ isPPC64 ? MVT::i64 : MVT::i32);
+ Args.push_back(Entry);
+
+ Entry.Node = FPtr; Args.push_back(Entry);
+ Entry.Node = Nest; Args.push_back(Entry);
+
+ // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+ std::move(Args), 0);
+
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.second;
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ SDLoc dl(Op);
+
+ if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+ // vastart just stores the address of the VarArgsFrameIndex slot into the
+ // memory location argument.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+ MachinePointerInfo(SV),
+ false, false, 0);
+ }
+
+ // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+ // We suppose the given va_list is already allocated.
+ //
+ // typedef struct {
+ // char gpr; /* index into the array of 8 GPRs
+ // * stored in the register save area
+ // * gpr=0 corresponds to r3,
+ // * gpr=1 to r4, etc.
+ // */
+ // char fpr; /* index into the array of 8 FPRs
+ // * stored in the register save area
+ // * fpr=0 corresponds to f1,
+ // * fpr=1 to f2, etc.
+ // */
+ // char *overflow_arg_area;
+ // /* location on stack that holds
+ // * the next overflow argument
+ // */
+ // char *reg_save_area;
+ // /* where r3:r10 and f1:f8 (if saved)
+ // * are stored
+ // */
+ // } va_list[1];
+
+
+ SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+ SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+ PtrVT);
+ SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+ PtrVT);
+
+ uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+ SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+ uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+ SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+ uint64_t FPROffset = 1;
+ SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+ // Store first byte : number of int regs
+ SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+ Op.getOperand(1),
+ MachinePointerInfo(SV),
+ MVT::i8, false, false, 0);
+ uint64_t nextOffset = FPROffset;
+ SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+ ConstFPROffset);
+
+ // Store second byte : number of float regs
+ SDValue secondStore =
+ DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+ MachinePointerInfo(SV, nextOffset), MVT::i8,
+ false, false, 0);
+ nextOffset += StackOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+ // Store second word : arguments given on stack
+ SDValue thirdStore =
+ DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+ nextOffset += FrameOffset;
+ nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+ // Store third word : arguments given in registers
+ return DAG.getStore(thirdStore, dl, FR, nextPtr,
+ MachinePointerInfo(SV, nextOffset),
+ false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ return true;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // Skip one register if the first unallocated register has an even register
+ // number and there are still argument registers available which have not been
+ // allocated yet. RegNum is actually an index into ArgRegs, which means we
+ // need to skip a register if RegNum is odd.
+ if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the first
+ // unallocated register has an odd register number and does not actually
+ // allocate a register for the current argument.
+ return false;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ static const MCPhysReg ArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+
+ const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+ unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+ // If there is only one Floating-point register left we need to put both f64
+ // values of a split ppc_fp128 value on the stack.
+ if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+ State.AllocateReg(ArgRegs[RegNum]);
+ }
+
+ // Always return false here, as this function only makes sure that the two f64
+ // values a ppc_fp128 value is split into are both passed in registers or both
+ // passed on the stack and does not actually allocate a register for the
+ // current argument.
+ return false;
+}
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const MCPhysReg *GetFPR() {
+ static const MCPhysReg FPR[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+ };
+
+ return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned ArgSize = ArgVT.getStoreSize();
+ if (Flags.isByVal())
+ ArgSize = Flags.getByValSize();
+
+ // Round up to multiples of the pointer size, except for array members,
+ // which are always packed.
+ if (!Flags.isInConsecutiveRegs())
+ ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+ return ArgSize;
+}
+
+/// CalculateStackSlotAlignment - Calculates the alignment of this argument
+/// on the stack.
+static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize) {
+ unsigned Align = PtrByteSize;
+
+ // Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ Align = 16;
+
+ // ByVal parameters are aligned as requested.
+ if (Flags.isByVal()) {
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > PtrByteSize) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ Align = BVAlign;
+ }
+ }
+
+ // Array members are always packed to their original alignment.
+ if (Flags.isInConsecutiveRegs()) {
+ // If the array member was split into multiple registers, the first
+ // needs to be aligned to the size of the full type. (Except for
+ // ppcf128, which is only aligned as its f64 components.)
+ if (Flags.isSplit() && OrigVT != MVT::ppcf128)
+ Align = OrigVT.getStoreSize();
+ else
+ Align = ArgVT.getStoreSize();
+ }
+
+ return Align;
+}
+
+/// CalculateStackSlotUsed - Return whether this argument will use its
+/// stack slot (instead of being passed in registers). ArgOffset,
+/// AvailableFPRs, and AvailableVRs must hold the current argument
+/// position, and will be updated to account for this argument.
+static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT,
+ ISD::ArgFlagsTy Flags,
+ unsigned PtrByteSize,
+ unsigned LinkageSize,
+ unsigned ParamAreaSize,
+ unsigned &ArgOffset,
+ unsigned &AvailableFPRs,
+ unsigned &AvailableVRs) {
+ bool UseMemory = false;
+
+ // Respect alignment of argument on the stack.
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ // If there's no space left in the argument save area, we must
+ // use memory (this check also catches zero-sized arguments).
+ if (ArgOffset >= LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // Allocate argument on the stack.
+ ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // If we overran the argument save area, we must use memory
+ // (this check catches arguments passed partially in memory)
+ if (ArgOffset > LinkageSize + ParamAreaSize)
+ UseMemory = true;
+
+ // However, if the argument is actually passed in an FPR or a VR,
+ // we don't use memory after all.
+ if (!Flags.isByVal()) {
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
+ if (AvailableFPRs > 0) {
+ --AvailableFPRs;
+ return false;
+ }
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64)
+ if (AvailableVRs > 0) {
+ --AvailableVRs;
+ return false;
+ }
+ }
+
+ return UseMemory;
+}
+
+/// EnsureStackAlignment - Round stack frame size up from NumBytes to
+/// ensure minimum alignment required for target.
+static unsigned EnsureStackAlignment(const TargetMachine &Target,
+ unsigned NumBytes) {
+ unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment();
+ unsigned AlignMask = TargetAlign - 1;
+ NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+ return NumBytes;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
+ return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ } else {
+ return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+ dl, DAG, InVals);
+ }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_32SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ // 32-bit SVR4 ABI Stack Frame Layout:
+ // +-----------------------------------+
+ // +--> | Back chain |
+ // | +-----------------------------------+
+ // | | Floating-point register save area |
+ // | +-----------------------------------+
+ // | | General register save area |
+ // | +-----------------------------------+
+ // | | CR save word |
+ // | +-----------------------------------+
+ // | | VRSAVE save word |
+ // | +-----------------------------------+
+ // | | Alignment padding |
+ // | +-----------------------------------+
+ // | | Vector register save area |
+ // | +-----------------------------------+
+ // | | Local variable space |
+ // | +-----------------------------------+
+ // | | Parameter list area |
+ // | +-----------------------------------+
+ // | | LR save word |
+ // | +-----------------------------------+
+ // SP--> +--- | Back chain |
+ // +-----------------------------------+
+ //
+ // Specifications:
+ // System V Application Binary Interface PowerPC Processor Supplement
+ // AltiVec Technology Programming Interface Manual
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 4;
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false);
+ CCInfo.AllocateStack(LinkageSize, PtrByteSize);
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored in registers.
+ if (VA.isRegLoc()) {
+ const TargetRegisterClass *RC;
+ EVT ValVT = VA.getValVT();
+
+ switch (ValVT.getSimpleVT().SimpleTy) {
+ default:
+ llvm_unreachable("ValVT not supported by formal arguments Lowering");
+ case MVT::i1:
+ case MVT::i32:
+ RC = &PPC::GPRCRegClass;
+ break;
+ case MVT::f32:
+ RC = &PPC::F4RCRegClass;
+ break;
+ case MVT::f64:
+ if (Subtarget.hasVSX())
+ RC = &PPC::VSFRCRegClass;
+ else
+ RC = &PPC::F8RCRegClass;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ RC = &PPC::VRRCRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v2i64:
+ RC = &PPC::VSHRCRegClass;
+ break;
+ }
+
+ // Transform the arguments stored in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
+ ValVT == MVT::i1 ? MVT::i32 : ValVT);
+
+ if (ValVT == MVT::i1)
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+
+ InVals.push_back(ArgValue);
+ } else {
+ // Argument stored in memory.
+ assert(VA.isMemLoc());
+
+ unsigned ArgSize = VA.getLocVT().getStoreSize();
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ isImmutable);
+
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo(),
+ false, false, false, 0));
+ }
+ }
+
+ // Assign locations to all of the incoming aggregate by value arguments.
+ // Aggregates passed by value are stored in the local variable space of the
+ // caller's stack frame, right above the parameter list area.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ MinReservedArea = std::max(MinReservedArea, LinkageSize);
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized function's reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ SmallVector<SDValue, 8> MemOps;
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ static const MCPhysReg GPArgRegs[] = {
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+ static const MCPhysReg FPArgRegs[] = {
+ PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+ PPC::F8
+ };
+ const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+ FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+ NumGPArgRegs));
+ FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+ NumFPArgRegs));
+
+ // Make room for NumGPArgRegs and NumFPArgRegs.
+ int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+ NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+
+ FuncInfo->setVarArgsStackOffset(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ CCInfo.getNextStackOffset(), true));
+
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // The fixed integer arguments of a variadic function are stored to the
+ // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+ // the result of va_next.
+ for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+
+ // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+ // is set.
+ // The double arguments are stored to the VarArgsFrameIndex
+ // on the stack.
+ for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+ // Get an existing live-in vreg, or add a new one.
+ unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+ if (!VReg)
+ VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by eight for the next argument to store
+ SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+ PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ return Chain;
+}
+
+// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+// value to MVT::i64 and then truncate to the correct register size.
+SDValue
+PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+ SelectionDAG &DAG, SDValue ArgVal,
+ SDLoc dl) const {
+ if (Flags.isSExt())
+ ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+ else if (Flags.isZExt())
+ ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+ DAG.getValueType(ObjectVT));
+
+ return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_64SVR4(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = 8;
+
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const MCPhysReg *FPR = GetFPR();
+
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof(VR);
+
+ // Do a first pass over the arguments to determine whether the ABI
+ // guarantees that our caller has allocated the parameter save area
+ // on its stack frame. In the ELFv1 ABI, this is always the case;
+ // in the ELFv2 ABI, it is true if this is a vararg function or if
+ // any parameter is located in a stack slot.
+
+ bool HasParameterArea = !isELFv2ABI || isVarArg;
+ unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
+ unsigned NumBytes = LinkageSize;
+ unsigned AvailableFPRs = Num_FPR_Regs;
+ unsigned AvailableVRs = Num_VR_Regs;
+ for (unsigned i = 0, e = Ins.size(); i != e; ++i)
+ if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
+ PtrByteSize, LinkageSize, ParamAreaSize,
+ NumBytes, AvailableFPRs, AvailableVRs))
+ HasParameterArea = true;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+ SmallVector<SDValue, 8> MemOps;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ EVT OrigVT = Ins[ArgNo].ArgVT;
+ unsigned ObjSize = ObjectVT.getStoreSize();
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+ unsigned CurArgOffset = ArgOffset;
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Empty aggregate parameters do not take up registers. Examples:
+ // struct { } a;
+ // union { } b;
+ // int c[0];
+ // etc. However, we have to provide a place-holder in InVals, so
+ // pretend we have an 8-byte item at the current address for that
+ // purpose.
+ if (!ObjSize) {
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ continue;
+ }
+
+ // Create a stack object covering all stack doublewords occupied
+ // by the argument. If the argument is (fully or partially) on
+ // the stack, or if the argument is fully in registers but the
+ // caller has allocated the parameter save anyway, we can refer
+ // directly to the caller's stack frame. Otherwise, create a
+ // local copy in our own frame.
+ int FI;
+ if (HasParameterArea ||
+ ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
+ FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
+ else
+ FI = MFI->CreateStackObject(ArgSize, Align, false);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+
+ // Handle aggregates smaller than 8 bytes.
+ if (ObjSize < PtrByteSize) {
+ // The value of the object is its address, which differs from the
+ // address of the enclosing doubleword on big-endian systems.
+ SDValue Arg = FIN;
+ if (!isLittleEndian) {
+ SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT);
+ Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
+ }
+ InVals.push_back(Arg);
+
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store;
+
+ if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
+ EVT ObjType = (ObjSize == 1 ? MVT::i8 :
+ (ObjSize == 2 ? MVT::i16 : MVT::i32));
+ Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
+ MachinePointerInfo(FuncArg),
+ ObjType, false, false, 0);
+ } else {
+ // For sizes that don't fit a truncating store (3, 5, 6, 7),
+ // store the whole register as-is to the parameter save area
+ // slot.
+ Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg),
+ false, false, 0);
+ }
+
+ MemOps.push_back(Store);
+ }
+ // Whether we copied from a register or not, advance the offset
+ // into the parameter save area by a full doubleword.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // The value of the object is its address, which is the address of
+ // its first stack doubleword.
+ InVals.push_back(FIN);
+
+ // Store whatever pieces of the object are in registers to memory.
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ if (GPR_idx == Num_GPR_Regs)
+ break;
+
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Addr = FIN;
+ if (j) {
+ SDValue Off = DAG.getConstant(j, PtrVT);
+ Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
+ }
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
+ MachinePointerInfo(FuncArg, j),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+ ArgOffset += ArgSize;
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ?
+ &PPC::VSFRCRegClass :
+ &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else if (GPR_idx != Num_GPR_Regs) {
+ // This can only ever happen in the presence of f32 array types,
+ // since otherwise we never run out of FPRs before running out
+ // of GPRs.
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::f32) {
+ if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
+ ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+ }
+
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
+ } else {
+ needsLoad = true;
+ }
+
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
+ ArgOffset += ArgSize;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
+ MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
+ MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++VR_idx;
+ } else {
+ needsLoad = true;
+ }
+ ArgOffset += 16;
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ if (ObjSize < ArgSize && !isLittleEndian)
+ CurArgOffset += ArgSize - ObjSize;
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ unsigned MinReservedArea;
+ if (HasParameterArea)
+ MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
+ else
+ MinReservedArea = LinkageSize;
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrByteSize, Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx < Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+ SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // TODO: add description of PPC stack frame format, or at least some docs.
+ //
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ // Potential tail calls could cause overwriting of argument stack slots.
+ bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
+ (CallConv == CallingConv::Fast));
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
+ unsigned ArgOffset = LinkageSize;
+ // Area that is at least reserved in caller of this function.
+ unsigned MinReservedArea = ArgOffset;
+
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+
+ static const MCPhysReg *FPR = GetFPR();
+
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+
+ const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+ const unsigned Num_FPR_Regs = 13;
+ const unsigned Num_VR_Regs = array_lengthof( VR);
+
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ // In 32-bit non-varargs functions, the stack space for vectors is after the
+ // stack space for non-vectors. We do not use this space unless we have
+ // too many vectors to fit in registers, something that only occurs in
+ // constructed examples:), but we have to walk the arglist to figure
+ // that out...for the pathological case, compute VecArgOffset as the
+ // start of the vector parameter area. Computing VecArgOffset is the
+ // entire point of the following loop.
+ unsigned VecArgOffset = ArgOffset;
+ if (!isVarArg && !isPPC64) {
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+ ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+ unsigned ObjSize = Flags.getByValSize();
+ unsigned ArgSize =
+ ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ VecArgOffset += ArgSize;
+ continue;
+ }
+
+ switch(ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::f32:
+ VecArgOffset += 4;
+ break;
+ case MVT::i64: // PPC64
+ case MVT::f64:
+ // FIXME: We are guaranteed to be !isPPC64 at this point.
+ // Does MVT::i64 apply?
+ VecArgOffset += 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Nothing to do, we're only looking at Nonvector args here.
+ break;
+ }
+ }
+ }
+ // We've found where the vector parameter area in memory is. Skip the
+ // first 12 parameters; these don't use that memory.
+ VecArgOffset = ((VecArgOffset+15)/16)*16;
+ VecArgOffset += 12*16;
+
+ // Add DAG nodes to load the arguments or copy them out of registers. On
+ // entry to a function on PPC, the arguments start after the linkage area,
+ // although the first ones are often in registers.
+
+ SmallVector<SDValue, 8> MemOps;
+ unsigned nAltivecParamsAtEnd = 0;
+ Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ SDValue ArgVal;
+ bool needsLoad = false;
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ unsigned ArgSize = ObjSize;
+ ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
+
+ unsigned CurArgOffset = ArgOffset;
+
+ // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+ if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+ ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+ if (isVarArg || isPPC64) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += CalculateStackSlotSize(ObjectVT,
+ Flags,
+ PtrByteSize);
+ } else nAltivecParamsAtEnd++;
+ } else
+ // Calculate min reserved area.
+ MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+ Flags,
+ PtrByteSize);
+
+ // FIXME the codegen can be much improved in some cases.
+ // We do not have to keep everything in memory.
+ if (Flags.isByVal()) {
+ // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+ ObjSize = Flags.getByValSize();
+ ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ // Objects of size 1 and 2 are right justified, everything else is
+ // left justified. This means the memory address is adjusted forwards.
+ if (ObjSize==1 || ObjSize==2) {
+ CurArgOffset = CurArgOffset + (4 - ObjSize);
+ }
+ // The value of the object is its address.
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ InVals.push_back(FIN);
+ if (ObjSize==1 || ObjSize==2) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16;
+ SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg),
+ ObjType, false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ }
+
+ ArgOffset += PtrByteSize;
+
+ continue;
+ }
+ for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+ // Store whatever pieces of the object are in registers
+ // to memory. ArgOffset will be the address of the beginning
+ // of the object.
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg;
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(FuncArg, j),
+ false, false, 0);
+ MemOps.push_back(Store);
+ ++GPR_idx;
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unhandled argument type!");
+ case MVT::i1:
+ case MVT::i32:
+ if (!isPPC64) {
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+
+ if (ObjectVT == MVT::i1)
+ ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += PtrByteSize;
+ break;
+ }
+ // FALLTHROUGH
+ case MVT::i64: // PPC64
+ if (GPR_idx != Num_GPR_Regs) {
+ unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+ if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
+ // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+ // value to MVT::i64 and then truncate to the correct register size.
+ ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
+
+ ++GPR_idx;
+ } else {
+ needsLoad = true;
+ ArgSize = PtrByteSize;
+ }
+ // All int arguments reserve stack space in the Darwin ABI.
+ ArgOffset += 8;
+ break;
+
+ case MVT::f32:
+ case MVT::f64:
+ // Every 4 bytes of argument space consumes one of the GPRs available for
+ // argument passing.
+ if (GPR_idx != Num_GPR_Regs) {
+ ++GPR_idx;
+ if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+ ++GPR_idx;
+ }
+ if (FPR_idx != Num_FPR_Regs) {
+ unsigned VReg;
+
+ if (ObjectVT == MVT::f32)
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+ else
+ VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++FPR_idx;
+ } else {
+ needsLoad = true;
+ }
+
+ // All FP arguments reserve stack space in the Darwin ABI.
+ ArgOffset += isPPC64 ? 8 : ObjSize;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ // Note that vector arguments in registers don't reserve stack space,
+ // except in varargs functions.
+ if (VR_idx != Num_VR_Regs) {
+ unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ if (isVarArg) {
+ while ((ArgOffset % 16) != 0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != Num_GPR_Regs)
+ GPR_idx++;
+ }
+ ArgOffset += 16;
+ GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+ }
+ ++VR_idx;
+ } else {
+ if (!isVarArg && !isPPC64) {
+ // Vectors go after all the nonvectors.
+ CurArgOffset = VecArgOffset;
+ VecArgOffset += 16;
+ } else {
+ // Vectors are aligned.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ CurArgOffset = ArgOffset;
+ ArgOffset += 16;
+ }
+ needsLoad = true;
+ }
+ break;
+ }
+
+ // We need to load the argument to a virtual register if we determined above
+ // that we ran out of physical registers of the appropriate type.
+ if (needsLoad) {
+ int FI = MFI->CreateFixedObject(ObjSize,
+ CurArgOffset + (ArgSize - ObjSize),
+ isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ }
+
+ InVals.push_back(ArgVal);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ MinReservedArea = ((MinReservedArea+15)/16)*16;
+ MinReservedArea += 16*nAltivecParamsAtEnd;
+ }
+
+ // Area that is at least reserved in the caller of this function.
+ MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize);
+
+ // Set the size that is at least reserved in caller of this function. Tail
+ // call optimized functions' reserved stack space needs to be aligned so that
+ // taking the difference between two stack areas will result in an aligned
+ // stack.
+ MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea);
+ FuncInfo->setMinReservedArea(MinReservedArea);
+
+ // If the function takes variable number of arguments, make a frame index for
+ // the start of the first vararg value... for expansion of llvm.va_start.
+ if (isVarArg) {
+ int Depth = ArgOffset;
+
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+ Depth, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+ // If this function is vararg, store any remaining integer argument regs
+ // to their spots on the stack so that they may be loaded by deferencing the
+ // result of va_next.
+ for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+ unsigned VReg;
+
+ if (isPPC64)
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+ else
+ VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+ SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+ MachinePointerInfo(), false, false, 0);
+ MemOps.push_back(Store);
+ // Increment the address by four for the next argument to store
+ SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+ FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+ }
+ }
+
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
+
+ return Chain;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accommodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+ unsigned ParamSize) {
+
+ if (!isTailCall) return 0;
+
+ PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+ unsigned CallerMinReservedArea = FI->getMinReservedArea();
+ int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+ // Remember only if the new adjustement is bigger.
+ if (SPDiff < FI->getTailCallSPDelta())
+ FI->setTailCallSPDelta(SPDiff);
+
+ return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ if (!getTargetMachine().Options.GuaranteedTailCallOpt)
+ return false;
+
+ // Variable argument functions are not supported.
+ if (isVarArg)
+ return false;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+ if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+ // Functions containing by val parameters are not supported.
+ for (unsigned i = 0; i != Ins.size(); i++) {
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (Flags.isByVal()) return false;
+ }
+
+ // Non-PIC/GOT tail calls are supported.
+ if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+ return true;
+
+ // At the moment we can only do local tail calls (in same module, hidden
+ // or protected) if we are generating PIC.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return G->getGlobal()->hasHiddenVisibility()
+ || G->getGlobal()->hasProtectedVisibility();
+ }
+
+ return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return nullptr;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ SignExtend32<26>(Addr) != Addr)
+ return nullptr; // Top 6 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2,
+ DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+ SDValue Arg;
+ SDValue FrameIdxOp;
+ int FrameIdx;
+
+ TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+ SDValue Chain,
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ SDLoc dl) {
+ for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+ SDValue Arg = TailCallArgs[i].Arg;
+ SDValue FIN = TailCallArgs[i].FrameIdxOp;
+ int FI = TailCallArgs[i].FrameIdx;
+ // Store relative to framepointer.
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+ MachineFunction &MF,
+ SDValue Chain,
+ SDValue OldRetAddr,
+ SDValue OldFP,
+ int SPDiff,
+ bool isPPC64,
+ bool isDarwinABI,
+ SDLoc dl) {
+ if (SPDiff) {
+ // Calculate the new stack slot for the return address.
+ int SlotSize = isPPC64 ? 8 : 4;
+ int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
+ isDarwinABI);
+ int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+ NewRetAddrLoc, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+ Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+ MachinePointerInfo::getFixedStack(NewRetAddr),
+ false, false, 0);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ int NewFPLoc =
+ SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+ true);
+ SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+ Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+ MachinePointerInfo::getFixedStack(NewFPIdx),
+ false, false, 0);
+ }
+ }
+ return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+ SDValue Arg, int SPDiff, unsigned ArgOffset,
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
+ int Offset = ArgOffset + SPDiff;
+ uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+ SDValue FIN = DAG.getFrameIndex(FI, VT);
+ TailCallArgumentInfo Info;
+ Info.Arg = Arg;
+ Info.FrameIdxOp = FIN;
+ Info.FrameIdx = FI;
+ TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ SDLoc dl) const {
+ if (SPDiff) {
+ // Load the LR and FP stack slot for later adjusting.
+ EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
+ LROpOut = getReturnAddrFrameIndex(DAG);
+ LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+ false, false, false, 0);
+ Chain = SDValue(LROpOut.getNode(), 1);
+
+ // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+ // slot as the FP is never overwritten.
+ if (isDarwinABI) {
+ FPOpOut = getFramePointerFrameIndex(DAG);
+ FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+ false, false, false, 0);
+ Chain = SDValue(FPOpOut.getNode(), 1);
+ }
+ }
+ return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size". Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+ SDLoc dl) {
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+ false, false, MachinePointerInfo(),
+ MachinePointerInfo());
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+ SDValue Arg, SDValue PtrOff, int SPDiff,
+ unsigned ArgOffset, bool isPPC64, bool isTailCall,
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
+ SDLoc dl) {
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ if (!isTailCall) {
+ if (isVector) {
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ }
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0));
+ // Calculate and remember argument location.
+ } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+ TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+ SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDValue LROp, SDValue FPOp, bool isDarwinABI,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+ // might overwrite each other in case of tail call optimization.
+ SmallVector<SDValue, 8> MemOpChains2;
+ // Do not flag preceding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+ MemOpChains2, dl);
+ if (!MemOpChains2.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
+
+ // Store the return address to the appropriate stack slot.
+ Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+ isPPC64, isDarwinABI, dl);
+
+ // Emit callseq_end just before tailcall node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
+ InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+ SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
+ const PPCSubtarget &Subtarget) {
+
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ NodeTys.push_back(MVT::Other); // Returns a chain
+ NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use.
+
+ unsigned CallOpc = PPCISD::CALL;
+
+ bool needIndirectCall = true;
+ if (!isSVR4ABI || !isPPC64)
+ if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+ // If this is an absolute destination address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ needIndirectCall = false;
+ }
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+ // Use indirect calls for ALL functions calls in JIT mode, since the
+ // far-call stubs may be outside relocation limits for a BL instruction.
+ if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+ unsigned OpFlags = 0;
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) &&
+ (G->getGlobal()->isDeclaration() ||
+ G->getGlobal()->isWeakForLinker())) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ !G->getGlobal()->hasLocalLinkage() &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress /
+ // TargetExternalSymbol node so that legalize doesn't hack it.
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType(),
+ 0, OpFlags);
+ needIndirectCall = false;
+ }
+ }
+
+ if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ unsigned char OpFlags = 0;
+
+ if ((DAG.getTarget().getRelocationModel() != Reloc::Static &&
+ (Subtarget.getTargetTriple().isMacOSX() &&
+ Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) ||
+ (Subtarget.isTargetELF() && !isPPC64 &&
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_) ) {
+ // PC-relative references to external symbols should go through $stub,
+ // unless we're building with the leopard linker or later, which
+ // automatically synthesizes these stubs.
+ OpFlags = PPCII::MO_PLT_OR_STUB;
+ }
+
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+ OpFlags);
+ needIndirectCall = false;
+ }
+
+ if (needIndirectCall) {
+ // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
+ // to do the call, we can't use PPCISD::CALL.
+ SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
+ // Function pointers in the 64-bit SVR4 ABI do not point to the function
+ // entry point, but to the function descriptor (the function entry point
+ // address is part of the function descriptor though).
+ // The function descriptor is a three doubleword structure with the
+ // following fields: function entry point, TOC base address and
+ // environment pointer.
+ // Thus for a call through a function pointer, the following actions need
+ // to be performed:
+ // 1. Save the TOC of the caller in the TOC save area of its stack
+ // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
+ // 2. Load the address of the function entry point from the function
+ // descriptor.
+ // 3. Load the TOC of the callee from the function descriptor into r2.
+ // 4. Load the environment pointer from the function descriptor into
+ // r11.
+ // 5. Branch to the function entry point address.
+ // 6. On return of the callee, the TOC of the caller needs to be
+ // restored (this is done in FinishCall()).
+ //
+ // All those operations are flagged together to ensure that no other
+ // operations can be scheduled in between. E.g. without flagging the
+ // operations together, a TOC access in the caller could be scheduled
+ // between the load of the callee TOC and the branch to the callee, which
+ // results in the TOC access going through the TOC of the callee instead
+ // of going through the TOC of the caller, which leads to incorrect code.
+
+ // Load the address of the function entry point from the function
+ // descriptor.
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
+ Chain = LoadFuncPtr.getValue(1);
+ InFlag = LoadFuncPtr.getValue(2);
+
+ // Load environment pointer into r11.
+ // Offset of the environment pointer within the function descriptor.
+ SDValue PtrOff = DAG.getIntPtrConstant(16);
+
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+ SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
+ InFlag);
+ Chain = LoadEnvPtr.getValue(1);
+ InFlag = LoadEnvPtr.getValue(2);
+
+ SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+ InFlag);
+ Chain = EnvVal.getValue(0);
+ InFlag = EnvVal.getValue(1);
+
+ // Load TOC of the callee into r2. We are using a target-specific load
+ // with r2 hard coded, because the result of a target-independent load
+ // would never go directly into r2, since r2 is a reserved register (which
+ // prevents the register allocator from allocating it), resulting in an
+ // additional register being allocated and an unnecessary move instruction
+ // being generated.
+ VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue TOCOff = DAG.getIntPtrConstant(8);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
+ SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
+ AddTOC, InFlag);
+ Chain = LoadTOCPtr.getValue(0);
+ InFlag = LoadTOCPtr.getValue(1);
+
+ MTCTROps[0] = Chain;
+ MTCTROps[1] = LoadFuncPtr;
+ MTCTROps[2] = InFlag;
+ }
+
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
+ InFlag = Chain.getValue(1);
+
+ NodeTys.clear();
+ NodeTys.push_back(MVT::Other);
+ NodeTys.push_back(MVT::Glue);
+ Ops.push_back(Chain);
+ CallOpc = PPCISD::BCTRL;
+ Callee.setNode(nullptr);
+ // Add use of X11 (holding environment pointer)
+ if (isSVR4ABI && isPPC64 && !isELFv2ABI)
+ Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
+ // Add CTR register as callee so a bctr can be emitted later.
+ if (isTailCall)
+ Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT));
+ }
+
+ // If this is a direct call, pass the chain and the callee.
+ if (Callee.getNode()) {
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+ }
+ // If this is a tail call add stack pointer delta.
+ if (isTailCall)
+ Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Direct calls in the ELFv2 ABI need the TOC register live into the call.
+ if (Callee.getNode() && isELFv2ABI)
+ Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
+
+ return CallOpc;
+}
+
+static
+bool isLocalCall(const SDValue &Callee)
+{
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ return !G->getGlobal()->isDeclaration() &&
+ !G->getGlobal()->isWeakForLinker();
+ return false;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl,
+ VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::ZExt:
+ Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ case CCValAssign::SExt:
+ Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
+ DAG.getValueType(VA.getValVT()));
+ Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ break;
+ }
+
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
+ bool isTailCall, bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ std::vector<EVT> NodeTys;
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+ isTailCall, RegsToPass, Ops, NodeTys,
+ Subtarget);
+
+ // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
+ if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
+ Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
+
+ // When performing tail call optimization the callee pops its arguments off
+ // the stack. Account for this here so these bytes can be pushed back on in
+ // PPCFrameLowering::eliminateCallFramePseudoInstr.
+ int BytesCalleePops =
+ (CallConv == CallingConv::Fast &&
+ getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0;
+
+ // Add a register mask operand representing the call-preserved registers.
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ // Emit tail call.
+ if (isTailCall) {
+ assert(((Callee.getOpcode() == ISD::Register &&
+ cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+ Callee.getOpcode() == ISD::TargetExternalSymbol ||
+ Callee.getOpcode() == ISD::TargetGlobalAddress ||
+ isa<ConstantSDNode>(Callee)) &&
+ "Expecting an global address, external symbol, absolute value or register");
+
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
+ }
+
+ // Add a NOP immediately after the branch instruction when using the 64-bit
+ // SVR4 ABI. At link time, if caller and callee are in a different module and
+ // thus have a different TOC, the call will be replaced with a call to a stub
+ // function which saves the current TOC, loads the TOC of the callee and
+ // branches to the callee. The NOP will be replaced with a load instruction
+ // which restores the TOC of the caller from the TOC save slot of the current
+ // stack frame. If caller and callee belong to the same module (and have the
+ // same TOC), the NOP will remain unchanged.
+
+ bool needsTOCRestore = false;
+ if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
+ if (CallOpc == PPCISD::BCTRL) {
+ // This is a call through a function pointer.
+ // Restore the caller TOC from the save area into R2.
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ // We are using a target-specific load with r2 hard coded, because the
+ // result of a target-independent load would never go directly into r2,
+ // since r2 is a reserved register (which prevents the register allocator
+ // from allocating it), resulting in an additional register being
+ // allocated and an unnecessary move instruction being generated.
+ needsTOCRestore = true;
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
+ // Otherwise insert NOP for non-local calls.
+ CallOpc = PPCISD::CALL_NOP;
+ }
+ }
+
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
+
+ if (needsTOCRestore) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
+ Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(BytesCalleePops, true),
+ InFlag, dl);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &isTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool isVarArg = CLI.IsVarArg;
+
+ if (isTailCall)
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+ Ins, DAG);
+
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
+ if (Subtarget.isSVR4ABI()) {
+ if (Subtarget.isPPC64())
+ return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ else
+ return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+ }
+
+ return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+ isTailCall, Outs, OutVals, Ins,
+ dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
+ // of the 32-bit SVR4 ABI stack frame layout.
+
+ assert((CallConv == CallingConv::C ||
+ CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+ unsigned PtrByteSize = 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, parameter list area and the part of the local variable space which
+ // contains copies of aggregates which are passed by value.
+
+ // Assign locations to all of the outgoing arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // Reserve space for the linkage area on the stack.
+ CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false),
+ PtrByteSize);
+
+ if (isVarArg) {
+ // Handle fixed and variable vector arguments differently.
+ // Fixed vector arguments go into registers as long as registers are
+ // available. Variable vector arguments always go into memory.
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ bool Result;
+
+ if (Outs[i].IsFixed) {
+ Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+ CCInfo);
+ } else {
+ Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+ ArgFlags, CCInfo);
+ }
+
+ if (Result) {
+#ifndef NDEBUG
+ errs() << "Call operand #" << i << " has unhandled type "
+ << EVT(ArgVT).getEVTString() << "\n";
+#endif
+ llvm_unreachable(nullptr);
+ }
+ }
+ } else {
+ // All arguments are treated the same.
+ CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
+ }
+
+ // Assign locations to all of the outgoing aggregate by value arguments.
+ SmallVector<CCValAssign, 16> ByValArgLocs;
+ CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ByValArgLocs, *DAG.getContext());
+
+ // Reserve stack space for the allocations in CCInfo.
+ CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+ CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
+
+ // Size of the linkage area, parameter list area and the part of the local
+ // space variable where copies of aggregates which are passed by value are
+ // stored.
+ unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be moved somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ bool seenFloatArg = false;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, j = 0, e = ArgLocs.size();
+ i != e;
+ ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ if (Flags.isByVal()) {
+ // Argument is an aggregate which is passed by value, thus we need to
+ // create a copy of it in the local variable space of the current stack
+ // frame (which is the stack frame of the caller) and pass the address of
+ // this copy to the callee.
+ assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+ CCValAssign &ByValVA = ByValArgLocs[j++];
+ assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+ // Memory reserved in the local variable space of the callers stack frame.
+ unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ // Create a copy of the argument in the local area of the current
+ // stack frame.
+ SDValue MemcpyCall =
+ CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+
+ // This must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ Chain = CallSeqStart = NewCallSeqStart;
+
+ // Pass the address of the aggregate copy on the stack either in a
+ // physical register or in the parameter list area of the current stack
+ // frame to the callee.
+ Arg = PtrOff;
+ }
+
+ if (VA.isRegLoc()) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg);
+
+ seenFloatArg |= VA.getLocVT().isFloatingPoint();
+ // Put argument in a physical register.
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Put argument in the parameter list area of the current stack frame.
+ assert(VA.isMemLoc());
+ unsigned LocMemOffset = VA.getLocMemOffset();
+
+ if (!isTailCall) {
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ } else {
+ // Calculate and remember argument location.
+ CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+ TailCallArguments);
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // Set CR bit 6 to true if this is a vararg call with floating args passed in
+ // registers.
+ if (isVarArg) {
+ SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue Ops[] = { Chain, InFlag };
+
+ Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
+ dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
+
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+ false, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+// Copy an argument into memory, being careful to do this outside the
+// call sequence for the call to which the argument belongs.
+SDValue
+PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart,
+ ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG,
+ SDLoc dl) const {
+ SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+ CallSeqStart.getNode()->getOperand(0),
+ Flags, DAG, dl);
+ // The MEMCPY must go outside the CALLSEQ_START..END.
+ SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
+ DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+ NewCallSeqStart.getNode());
+ return NewCallSeqStart;
+}
+
+SDValue
+PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ bool isELFv2ABI = Subtarget.isELFv2ABI();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ unsigned PtrByteSize = 8;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
+ // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
+ // area is 32 bytes reserved space for [SP][CR][LR][TOC].
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false,
+ isELFv2ABI);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ NumBytes = ((NumBytes + Align - 1) / Align) * Align;
+
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ if (Flags.isInConsecutiveRegsLast())
+ NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ }
+
+ unsigned NumBytesActuallyUsed = NumBytes;
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx, FPR_idx = 0, VR_idx = 0;
+
+ static const MCPhysReg GPR[] = {
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg *FPR = GetFPR();
+
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ static const MCPhysReg VSRH[] = {
+ PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
+ PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
+ };
+
+ const unsigned NumGPRs = array_lengthof(GPR);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ EVT OrigVT = Outs[i].ArgVT;
+
+ /* Respect alignment of argument on the stack. */
+ unsigned Align =
+ CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
+ ArgOffset = ((ArgOffset + Align - 1) / Align) * Align;
+
+ /* Compute GPR index associated with argument offset. */
+ GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
+ GPR_idx = std::min(GPR_idx, NumGPRs);
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // Promote integers to 64-bit values.
+ if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ // Note: Size includes alignment padding, so
+ // struct x { short a; char b; }
+ // will have Size = 4. With #pragma pack(1), it will have Size = 3.
+ // These are the proper values we need for right-justifying the
+ // aggregate in a parameter register.
+ unsigned Size = Flags.getByValSize();
+
+ // An empty aggregate parameter takes up no storage and no
+ // registers.
+ if (Size == 0)
+ continue;
+
+ // All aggregates smaller than 8 bytes must be passed right-justified.
+ if (Size==1 || Size==2 || Size==4) {
+ EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ }
+
+ if (GPR_idx == NumGPRs && Size < 8) {
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+
+ // FIXME: The above statement is likely due to a misunderstanding of the
+ // documents. All arguments must be copied into the parameter area BY
+ // THE CALLEE in the event that the callee takes the address of any
+ // formal argument. That has not yet been implemented. However, it is
+ // reasonable to use the stack area as a staging area for the register
+ // load.
+
+ // Skip this for small aggregates, as we will use the same slot for a
+ // right-justified copy, below.
+ if (Size >= 8)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // When a register is available, pass a small aggregate right-justified.
+ if (Size < 8 && GPR_idx != NumGPRs) {
+ // The easiest way to get this right-justified in a register
+ // is to copy the structure into the rightmost portion of a
+ // local variable slot, then load the whole slot into the
+ // register.
+ // FIXME: The memcpy seems to produce pretty awful code for
+ // small aggregates, particularly for packed ones.
+ // FIXME: It would be preferable to use the slot in the
+ // parameter save area instead of a new local variable.
+ SDValue AddPtr = PtrOff;
+ if (!isLittleEndian) {
+ SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
+ AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ }
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // Load the slot into the register.
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load));
+
+ // Done with this argument.
+ ArgOffset += PtrByteSize;
+ continue;
+ }
+
+ // For aggregates larger than PtrByteSize, copy the pieces of the
+ // object that fit into registers from the parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ // These can be scalar arguments or elements of an integer array type
+ // passed directly. Clang may use those instead of "byval" aggregate
+ // types to avoid forcing arguments to memory unnecessarily.
+ if (GPR_idx != NumGPRs) {
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64: {
+ // These can be scalar arguments or elements of a float array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // float aggregates.
+
+ // Named arguments go into FPRs first, and once they overflow, the
+ // remaining arguments go into GPRs and then the parameter save area.
+ // Unnamed arguments for vararg functions always go to GPRs and
+ // then the parameter save area. For now, put all arguments to vararg
+ // routines always in both locations (FPR *and* GPR or stack slot).
+ bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs;
+
+ // First load the argument into the next available FPR.
+ if (FPR_idx != NumFPRs)
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ // Next, load the argument into GPR or stack slot if needed.
+ if (!NeedGPROrStack)
+ ;
+ else if (GPR_idx != NumGPRs) {
+ // In the non-vararg case, this can only ever happen in the
+ // presence of f32 array types, since otherwise we never run
+ // out of FPRs before running out of GPRs.
+ SDValue ArgVal;
+
+ // Double values are always passed in a single GPR.
+ if (Arg.getValueType() != MVT::f32) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+
+ // Non-array float values are extended and passed in a GPR.
+ } else if (!Flags.isInConsecutiveRegs()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+
+ // If we have an array of floats, we collect every odd element
+ // together with its predecessor into one GPR.
+ } else if (ArgOffset % PtrByteSize != 0) {
+ SDValue Lo, Hi;
+ Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
+ Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ if (!isLittleEndian)
+ std::swap(Lo, Hi);
+ ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+
+ // The final element, if even, goes into the first half of a GPR.
+ } else if (Flags.isInConsecutiveRegsLast()) {
+ ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+ ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
+ if (!isLittleEndian)
+ ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
+ DAG.getConstant(32, MVT::i32));
+
+ // Non-final even elements are skipped; they will be handled
+ // together the with subsequent argument on the next go-around.
+ } else
+ ArgVal = SDValue();
+
+ if (ArgVal.getNode())
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal));
+ } else {
+ // Single-precision floating-point values are mapped to the
+ // second (rightmost) word of the stack doubleword.
+ if (Arg.getValueType() == MVT::f32 &&
+ !isLittleEndian && !Flags.isInConsecutiveRegs()) {
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ }
+
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ // When passing an array of floats, the array occupies consecutive
+ // space in the argument area; only round up to the next doubleword
+ // at the end of the array. Otherwise, each float takes 8 bytes.
+ ArgOffset += (Arg.getValueType() == MVT::f32 &&
+ Flags.isInConsecutiveRegs()) ? 4 : 8;
+ if (Flags.isInConsecutiveRegsLast())
+ ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+ break;
+ }
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ case MVT::v2f64:
+ case MVT::v2i64:
+ // These can be scalar arguments or elements of a vector array type
+ // passed directly. The latter are used to implement ELFv2 homogenous
+ // vector aggregates.
+
+ // For a varargs call, named arguments go into VRs or on the stack as
+ // usual; unnamed arguments always go to the stack or the corresponding
+ // GPRs when within range. For now, we always put the value in both
+ // locations (or even all three).
+ if (isVarArg) {
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params go into VRs or on the stack.
+ if (VR_idx != NumVRs) {
+ unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
+ Arg.getSimpleValueType() == MVT::v2i64) ?
+ VSRH[VR_idx] : VR[VR_idx];
+ ++VR_idx;
+
+ RegsToPass.push_back(std::make_pair(VReg, Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ true, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += 16;
+ break;
+ }
+ }
+
+ assert(NumBytesActuallyUsed == ArgOffset);
+ (void)NumBytesActuallyUsed;
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // Check if this is an indirect call (MTCTR/BCTRL).
+ // See PrepareCall() for more information about calls through function
+ // pointers in the 64-bit SVR4 ABI.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ // Load r2 into a virtual register and store it to the TOC save area.
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+ // TOC save area offset.
+ unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
+ SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+ Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+ false, false, 0);
+ // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
+ // This does not mean the MTCTR instruction must use R12; it's easier
+ // to model this as an extra parameter, so do that.
+ if (isELFv2ABI)
+ RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+
+ unsigned NumOps = Outs.size();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+ unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Mark this function as potentially containing a function that contains a
+ // tail call. As a consequence the frame pointer will be used for dynamicalloc
+ // and restoring the callers stack pointer in this functions epilog. This is
+ // done because by tail calling the called function might overwrite the value
+ // in this function's (MF) stack pointer stack slot 0(SP).
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+ // Count how many bytes are to be pushed on the stack, including the linkage
+ // area, and parameter passing area. We start with 24/48 bytes, which is
+ // prereserved space for [SP][CR][LR][3 x unused].
+ unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true,
+ false);
+ unsigned NumBytes = LinkageSize;
+
+ // Add up all the space actually used.
+ // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+ // they all go in registers, but we must reserve stack space for them for
+ // possible use by the caller. In varargs or 64-bit calls, parameters are
+ // assigned stack space in order, with padding so Altivec parameters are
+ // 16-byte aligned.
+ unsigned nAltivecParamsAtEnd = 0;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ EVT ArgVT = Outs[i].VT;
+ // Varargs Altivec parameters are padded to a 16 byte boundary.
+ if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
+ ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
+ ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) {
+ if (!isVarArg && !isPPC64) {
+ // Non-varargs Altivec parameters go after all the non-Altivec
+ // parameters; handle those later so we know how much padding we need.
+ nAltivecParamsAtEnd++;
+ continue;
+ }
+ // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+ NumBytes = ((NumBytes+15)/16)*16;
+ }
+ NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+ }
+
+ // Allow for Altivec parameters at the end, if needed.
+ if (nAltivecParamsAtEnd) {
+ NumBytes = ((NumBytes+15)/16)*16;
+ NumBytes += 16*nAltivecParamsAtEnd;
+ }
+
+ // The prolog code of the callee may store up to 8 GPR argument registers to
+ // the stack, allowing va_start to index over them in memory if its varargs.
+ // Because we cannot tell if this is needed on the caller side, we have to
+ // conservatively assume that it is needed. As such, make sure we have at
+ // least enough stack space for the caller to store the 8 GPRs.
+ NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
+
+ // Tail call needs the stack to be aligned.
+ if (getTargetMachine().Options.GuaranteedTailCallOpt &&
+ CallConv == CallingConv::Fast)
+ NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes);
+
+ // Calculate by how many bytes the stack has to be adjusted in case of tail
+ // call optimization.
+ int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+ // To protect arguments on the stack from being clobbered in a tail call,
+ // force all the loads to happen before doing any other lowering.
+ if (isTailCall)
+ Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
+ SDValue CallSeqStart = Chain;
+
+ // Load the return address and frame pointer so it can be move somewhere else
+ // later.
+ SDValue LROp, FPOp;
+ Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+ dl);
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr;
+ if (isPPC64)
+ StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+ else
+ StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory. Also, if this is a vararg function, floating point operations
+ // must be stored to our stack, and loaded into integer regs as well, if
+ // any integer regs are available for argument passing.
+ unsigned ArgOffset = LinkageSize;
+ unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
+ PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+ PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+ };
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
+ PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+ };
+ static const MCPhysReg *FPR = GetFPR();
+
+ static const MCPhysReg VR[] = {
+ PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+ PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+ };
+ const unsigned NumGPRs = array_lengthof(GPR_32);
+ const unsigned NumFPRs = 13;
+ const unsigned NumVRs = array_lengthof(VR);
+
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+ SmallVector<SDValue, 8> MemOpChains;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff;
+
+ PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ // On PPC64, promote integers to 64-bit values.
+ if (isPPC64 && Arg.getValueType() == MVT::i32) {
+ // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+ unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+ }
+
+ // FIXME memcpy is used way more than necessary. Correctness first.
+ // Note: "by value" is code for passing a structure by value, not
+ // basic types.
+ if (Flags.isByVal()) {
+ unsigned Size = Flags.getByValSize();
+ // Very small objects are passed right-justified. Everything else is
+ // passed left-justified.
+ if (Size==1 || Size==2) {
+ EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ MachinePointerInfo(), VT,
+ false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+ ArgOffset += PtrByteSize;
+ } else {
+ SDValue Const = DAG.getConstant(PtrByteSize - Size,
+ PtrOff.getValueType());
+ SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
+ CallSeqStart,
+ Flags, DAG, dl);
+ ArgOffset += PtrByteSize;
+ }
+ continue;
+ }
+ // Copy entire object into memory. There are cases where gcc-generated
+ // code assumes it is there, even if it could be put entirely into
+ // registers. (This is not what the doc says.)
+ Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
+ CallSeqStart,
+ Flags, DAG, dl);
+
+ // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
+ // copy the pieces of the object that fit into registers from the
+ // parameter save area.
+ for (unsigned j=0; j<Size; j+=PtrByteSize) {
+ SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+ SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ ArgOffset += PtrByteSize;
+ } else {
+ ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+ break;
+ }
+ }
+ continue;
+ }
+
+ switch (Arg.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i1:
+ case MVT::i32:
+ case MVT::i64:
+ if (GPR_idx != NumGPRs) {
+ if (Arg.getValueType() == MVT::i1)
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg);
+
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+ } else {
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ }
+ ArgOffset += PtrByteSize;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ if (FPR_idx != NumFPRs) {
+ RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+ if (isVarArg) {
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+
+ // Float varargs are always shadowed in available integer registers
+ if (GPR_idx != NumGPRs) {
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(), false, false,
+ false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+ SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ } else {
+ // If we have any FPRs remaining, we may also have GPRs remaining.
+ // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+ // GPRs.
+ if (GPR_idx != NumGPRs)
+ ++GPR_idx;
+ if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+ !isPPC64) // PPC64 has 64-bit GPR's obviously :)
+ ++GPR_idx;
+ }
+ } else
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, false, MemOpChains,
+ TailCallArguments, dl);
+ if (isPPC64)
+ ArgOffset += 8;
+ else
+ ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+ break;
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (isVarArg) {
+ // These go aligned on the stack, or in the corresponding R registers
+ // when within range. The Darwin PPC ABI doc claims they also go in
+ // V registers; in fact gcc does this only for arguments that are
+ // prototyped, not for those that match the ... We do it for all
+ // arguments, seems to work.
+ while (ArgOffset % 16 !=0) {
+ ArgOffset += PtrByteSize;
+ if (GPR_idx != NumGPRs)
+ GPR_idx++;
+ }
+ // We could elide this store in the case where the object fits
+ // entirely in R registers. Maybe later.
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+ DAG.getConstant(ArgOffset, PtrVT));
+ SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(), false, false, 0);
+ MemOpChains.push_back(Store);
+ if (VR_idx != NumVRs) {
+ SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+ MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+ }
+ ArgOffset += 16;
+ for (unsigned i=0; i<16; i+=PtrByteSize) {
+ if (GPR_idx == NumGPRs)
+ break;
+ SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+ DAG.getConstant(i, PtrVT));
+ SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+ false, false, false, 0);
+ MemOpChains.push_back(Load.getValue(1));
+ RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+ }
+ break;
+ }
+
+ // Non-varargs Altivec params generally go in registers, but have
+ // stack space allocated at the end.
+ if (VR_idx != NumVRs) {
+ // Doesn't have GPR space allocated.
+ RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+ } else if (nAltivecParamsAtEnd==0) {
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ break;
+ }
+ }
+ // If all Altivec parameters fit in registers, as they usually do,
+ // they get stack space following the non-Altivec parameters. We
+ // don't track this here because nobody below needs it.
+ // If there are more Altivec parameters than fit in registers emit
+ // the stores here.
+ if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+ unsigned j = 0;
+ // Offset is aligned; skip 1st 12 params which go in V registers.
+ ArgOffset = ((ArgOffset+15)/16)*16;
+ ArgOffset += 12*16;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
+ if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+ ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+ if (++j > NumVRs) {
+ SDValue PtrOff;
+ // We are emitting Altivec params in order.
+ LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+ isPPC64, isTailCall, true, MemOpChains,
+ TailCallArguments, dl);
+ ArgOffset += 16;
+ }
+ }
+ }
+ }
+
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
+
+ // On Darwin, R12 must contain the address of an indirect callee. This does
+ // not mean the MTCTR instruction must use R12; it's easier to model this as
+ // an extra parameter, so do that.
+ if (!isTailCall &&
+ !dyn_cast<GlobalAddressSDNode>(Callee) &&
+ !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+ !isBLACompatibleAddress(Callee, DAG))
+ RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+ PPC::R12), Callee));
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ if (isTailCall)
+ PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+ FPOp, true, TailCallArguments);
+
+ return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+ RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+ Ins, InVals);
+}
+
+bool
+PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+ MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(),
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_PPC);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = OutVals[i];
+
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ RetOps[0] = Chain; // Update chain.
+
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // When we pop the dynamic allocation we need to restore the SP link.
+ SDLoc dl(Op);
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Construct the stack pointer operand.
+ bool isPPC64 = Subtarget.isPPC64();
+ unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+ SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+ // Get the operands for the STACKRESTORE.
+ SDValue Chain = Op.getOperand(0);
+ SDValue SaveSP = Op.getOperand(1);
+
+ // Load the old link SP.
+ SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+ MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Restore the stack pointer.
+ Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+ // Store the old link SP.
+ return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+ false, false, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int RASI = FI->getReturnAddrSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!RASI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for frame pointer save area.
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+ // Save the result.
+ FI->setReturnAddrSaveIndex(RASI);
+ }
+ return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Get current frame pointer save index. The users of this index will be
+ // primarily DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+
+ // If the frame pointer save index hasn't been defined yet.
+ if (!FPSI) {
+ // Find out what the fix offset of the frame pointer save area.
+ int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
+ isDarwinABI);
+
+ // Allocate the frame index for frame pointer save area.
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+ // Save the result.
+ FI->setFramePointerSaveIndex(FPSI);
+ }
+ return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ // Get the inputs.
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ // Get the corect type for pointers.
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Negate the size.
+ SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+ DAG.getConstant(0, PtrVT), Size);
+ // Construct a node for the frame pointer save index.
+ SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+ // Build a DYNALLOC node.
+ SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+ SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
+ Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 loads");
+
+ // First, load 8 bits into 32 bits, then truncate to 1 bit.
+
+ SDLoc dl(Op);
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+
+ SDValue Chain = LD->getChain();
+ SDValue BasePtr = LD->getBasePtr();
+ MachineMemOperand *MMO = LD->getMemOperand();
+
+ SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain,
+ BasePtr, MVT::i8, MMO);
+ SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
+
+ SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getOperand(1).getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 stores");
+
+ // First, zero extend to 32 bits, then use a truncating store to 8 bits.
+
+ SDLoc dl(Op);
+ StoreSDNode *ST = cast<StoreSDNode>(Op);
+
+ SDValue Chain = ST->getChain();
+ SDValue BasePtr = ST->getBasePtr();
+ SDValue Value = ST->getValue();
+ MachineMemOperand *MMO = ST->getMemOperand();
+
+ Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value);
+ return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
+}
+
+// FIXME: Remove this once the ANDI glue bug is fixed:
+SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::i1 &&
+ "Custom lowering only for i1 results");
+
+ SDLoc DL(Op);
+ return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1,
+ Op.getOperand(0));
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+ // Not FP? Not a fsel.
+ if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+ !Op.getOperand(2).getValueType().isFloatingPoint())
+ return Op;
+
+ // We might be able to do better than this under some circumstances, but in
+ // general, fsel-based lowering of select is a finite-math-only optimization.
+ // For more information, see section F.3 of the 2.06 ISA specification.
+ if (!DAG.getTarget().Options.NoInfsFPMath ||
+ !DAG.getTarget().Options.NoNaNsFPMath)
+ return Op;
+
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+ EVT ResVT = Op.getValueType();
+ EVT CmpVT = Op.getOperand(0).getValueType();
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
+ SDLoc dl(Op);
+
+ // If the RHS of the comparison is a 0.0, we don't need to do the
+ // subtraction at all.
+ SDValue Sel1;
+ if (isFloatingPointZero(RHS))
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
+ case ISD::SETULT:
+ case ISD::SETLT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
+ LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+ }
+
+ SDValue Cmp;
+ switch (CC) {
+ default: break; // SETUO etc aren't handled by fsel.
+ case ISD::SETNE:
+ std::swap(TV, FV);
+ case ISD::SETEQ:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+ DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOGE:
+ case ISD::SETGE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+ case ISD::SETOLE:
+ case ISD::SETLE:
+ Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+ if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
+ Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+ return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+ }
+ return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ SDLoc dl) const {
+ assert(Op.getOperand(0).getValueType().isFloatingPoint());
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() == MVT::f32)
+ Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+ SDValue Tmp;
+ switch (Op.getSimpleValueType().SimpleTy) {
+ default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+ case MVT::i32:
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+ (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ :
+ PPCISD::FCTIDZ),
+ dl, MVT::f64, Src);
+ break;
+ case MVT::i64:
+ assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) &&
+ "i64 FP_TO_UINT is supported only with FPCVT");
+ Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
+ PPCISD::FCTIDUZ,
+ dl, MVT::f64, Src);
+ break;
+ }
+
+ // Convert the FP value to an int value through memory.
+ bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
+ (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT());
+ SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
+ int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI);
+
+ // Emit a store to the stack slot.
+ SDValue Chain;
+ if (i32Stack) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
+ SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
+ Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
+ } else
+ Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+ MPI, false, false, 0);
+
+ // Result is a load from the stack slot. If loading 4 bytes, make sure to
+ // add in a bias.
+ if (Op.getValueType() == MVT::i32 && !i32Stack) {
+ FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+ DAG.getConstant(4, FIPtr.getValueType()));
+ MPI = MachinePointerInfo();
+ }
+
+ return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI,
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+ if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+ return SDValue();
+
+ if (Op.getOperand(0).getValueType() == MVT::i1)
+ return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0),
+ DAG.getConstantFP(1.0, Op.getValueType()),
+ DAG.getConstantFP(0.0, Op.getValueType()));
+
+ assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
+ "UINT_TO_FP is supported only with FPCVT");
+
+ // If we have FCFIDS, then use it when converting to single-precision.
+ // Otherwise, convert to double-precision and then round.
+ unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDUS : PPCISD::FCFIDS) :
+ (Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::FCFIDU : PPCISD::FCFID);
+ MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ?
+ MVT::f32 : MVT::f64;
+
+ if (Op.getOperand(0).getValueType() == MVT::i64) {
+ SDValue SINT = Op.getOperand(0);
+ // When converting to single-precision, we actually need to convert
+ // to double-precision first and then round to single-precision.
+ // To avoid double-rounding effects during that operation, we have
+ // to prepare the input operand. Bits that might be truncated when
+ // converting to double-precision are replaced by a bit that won't
+ // be lost at this stage, but is below the single-precision rounding
+ // position.
+ //
+ // However, if -enable-unsafe-fp-math is in effect, accept double
+ // rounding to avoid the extra overhead.
+ if (Op.getValueType() == MVT::f32 &&
+ !Subtarget.hasFPCVT() &&
+ !DAG.getTarget().Options.UnsafeFPMath) {
+
+ // Twiddle input to make sure the low 11 bits are zero. (If this
+ // is the case, we are guaranteed the value will fit into the 53 bit
+ // mantissa of an IEEE double-precision value without rounding.)
+ // If any of those low 11 bits were not zero originally, make sure
+ // bit 12 (value 2048) is set instead, so that the final rounding
+ // to single-precision gets the correct result.
+ SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ SINT, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Round, DAG.getConstant(2047, MVT::i64));
+ Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
+ Round = DAG.getNode(ISD::AND, dl, MVT::i64,
+ Round, DAG.getConstant(-2048, MVT::i64));
+
+ // However, we cannot use that value unconditionally: if the magnitude
+ // of the input value is small, the bit-twiddling we did above might
+ // end up visibly changing the output. Fortunately, in that case, we
+ // don't need to twiddle bits since the original input will convert
+ // exactly to double-precision floating-point already. Therefore,
+ // construct a conditional to use the original value if the top 11
+ // bits are all sign-bit copies, and use the rounded value computed
+ // above otherwise.
+ SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
+ SINT, DAG.getConstant(53, MVT::i32));
+ Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
+ Cond, DAG.getConstant(1, MVT::i64));
+ Cond = DAG.getSetCC(dl, MVT::i32,
+ Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT);
+
+ SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
+ }
+
+ SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits);
+
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
+ FP = DAG.getNode(ISD::FP_ROUND, dl,
+ MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+ }
+
+ assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+ "Unhandled INT_TO_FP type in custom expander!");
+ // Since we only generate this in 64-bit mode, we can take advantage of
+ // 64-bit registers. In particular, sign extend the input value into the
+ // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+ // then lfd it and fcfid it.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SDValue Ld;
+ if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
+ int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
+ "Expected an i32 store");
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 4, 4);
+ SDValue Ops[] = { Store, FIdx };
+ Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
+ PPCISD::LFIWZX : PPCISD::LFIWAX,
+ dl, DAG.getVTList(MVT::f64, MVT::Other),
+ Ops, MVT::i32, MMO);
+ } else {
+ assert(Subtarget.isPPC64() &&
+ "i32->FP without LFIWAX supported only on PPC64");
+
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
+ Op.getOperand(0));
+
+ // STD the extended value into the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, 0);
+
+ // Load the value as a double.
+ Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx,
+ MachinePointerInfo::getFixedStack(FrameIdx),
+ false, false, false, 0);
+ }
+
+ // FCFID it and return it.
+ SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld);
+ if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT())
+ FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+ return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ /*
+ The rounding mode is in bits 30:31 of FPSR, and has the following
+ settings:
+ 00 Round to nearest
+ 01 Round to 0
+ 10 Round to +inf
+ 11 Round to -inf
+
+ FLT_ROUNDS, on the other hand, expects the following:
+ -1 Undefined
+ 0 Round to 0
+ 1 Round to nearest
+ 2 Round to +inf
+ 3 Round to -inf
+
+ To perform the conversion, we do:
+ ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+ */
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT VT = Op.getValueType();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Save FP Control Word to register
+ EVT NodeTys[] = {
+ MVT::f64, // return register
+ MVT::Glue // unused in this context
+ };
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
+
+ // Save FP register to stack slot
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+ SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+ StackSlot, MachinePointerInfo(), false, false,0);
+
+ // Load FP Control Word from low 32 bits of stack slot.
+ SDValue Four = DAG.getConstant(4, PtrVT);
+ SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+ SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+ false, false, false, 0);
+
+ // Transform as necessary
+ SDValue CWD1 =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32));
+ SDValue CWD2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ DAG.getNode(ISD::XOR, dl, MVT::i32,
+ CWD, DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(3, MVT::i32)),
+ DAG.getConstant(1, MVT::i32));
+
+ SDValue RetVal =
+ DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+ return DAG.getNode((VT.getSizeInBits() < 16 ?
+ ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ SDLoc dl(Op);
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SHL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+ SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRL!");
+
+ // Expand into a bunch of logical ops. Note that these ops
+ // depend on the PPC behavior for oversized shift amounts.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+ SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+ SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ assert(Op.getNumOperands() == 3 &&
+ VT == Op.getOperand(1).getValueType() &&
+ "Unexpected SRA!");
+
+ // Expand into a bunch of logical ops, followed by a select_cc.
+ SDValue Lo = Op.getOperand(0);
+ SDValue Hi = Op.getOperand(1);
+ SDValue Amt = Op.getOperand(2);
+ EVT AmtVT = Amt.getValueType();
+
+ SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+ DAG.getConstant(BitWidth, AmtVT), Amt);
+ SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+ SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+ SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+ SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+ DAG.getConstant(-BitWidth, AmtVT));
+ SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+ SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+ SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+ Tmp4, Tmp6, ISD::SETLE);
+ SDValue OutOps[] = { OutLo, OutHi };
+ return DAG.getMergeValues(OutOps, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize. Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+ SelectionDAG &DAG, SDLoc dl) {
+ assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+ static const EVT VTys[] = { // canonical VT to use for each size.
+ MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+ };
+
+ EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+ // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+ if (Val == -1)
+ SplatSize = 1;
+
+ EVT CanonicalVT = VTys[SplatSize-1];
+
+ // Build a canonical splat for this value.
+ SDValue Elt = DAG.getConstant(Val, MVT::i32);
+ SmallVector<SDValue, 8> Ops;
+ Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
+ SelectionDAG &DAG, SDLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, SDLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+ SDValue Op2, SelectionDAG &DAG,
+ SDLoc dl, EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount. The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+ EVT VT, SelectionDAG &DAG, SDLoc dl) {
+ // Force LHS/RHS to be the right type.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+ int Ops[16];
+ for (unsigned i = 0; i != 16; ++i)
+ Ops[i] = i + Amt;
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it. If we CAN select this case, and if it
+// selects to a single instruction, return Op. Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+ // Check if this is a splat of a constant value.
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, 0, true) || SplatBitSize > 32)
+ return SDValue();
+
+ unsigned SplatBits = APSplatBits.getZExtValue();
+ unsigned SplatUndef = APSplatUndef.getZExtValue();
+ unsigned SplatSize = SplatBitSize / 8;
+
+ // First, handle single instruction cases.
+
+ // All zeros?
+ if (SplatBits == 0) {
+ // Canonicalize all zero vectors to be v4i32.
+ if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+ SDValue Z = DAG.getConstant(0, MVT::i32);
+ Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+ Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+ }
+ return Op;
+ }
+
+ // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+ int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+ (32-SplatBitSize));
+ if (SextVal >= -16 && SextVal <= 15)
+ return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+ // Two instruction sequences.
+
+ // If this value is in the range [-32,30] and is even, use:
+ // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
+ // If this value is in the range [17,31] and is odd, use:
+ // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
+ // If this value is in the range [-31,-17] and is odd, use:
+ // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
+ // Note the last two are three-instruction sequences.
+ if (SextVal >= -32 && SextVal <= 31) {
+ // To avoid having these optimizations undone by constant folding,
+ // we convert to a pseudo that will be expanded later into one of
+ // the above forms.
+ SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
+ EVT VT = (SplatSize == 1 ? MVT::v16i8 :
+ (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
+ SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
+ SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ if (VT == Op.getValueType())
+ return RetVal;
+ else
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
+ }
+
+ // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
+ // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
+ // for fneg/fabs.
+ if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+ // Make -1 and vspltisw -1:
+ SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+ // Make the VSLW intrinsic, computing 0x8000_0000.
+ SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+ OnesV, DAG, dl);
+
+ // xor by OnesV to invert it.
+ Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // The remaining cases assume either big endian element order or
+ // a splat-size that equates to the element size of the vector
+ // to be built. An example that doesn't work for little endian is
+ // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits
+ // and a vector element size of 16 bits. The code below will
+ // produce the vector in big endian element order, which for little
+ // endian is {-1, 0, -1, 0, -1, 0, -1, 0}.
+
+ // For now, just avoid these optimizations in that case.
+ // FIXME: Develop correct optimizations for LE with mismatched
+ // splat and element sizes.
+
+ if (Subtarget.isLittleEndian() &&
+ SplatSize != Op.getValueType().getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ // Check to see if this is a wide variety of vsplti*, binop self cases.
+ static const signed char SplatCsts[] = {
+ -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+ -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+ };
+
+ for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+ // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+ // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
+ int i = SplatCsts[idx];
+
+ // Figure out what shift amount will be used by altivec if shifted by i in
+ // this splat size.
+ unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+ // vsplti + shl self.
+ if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+ Intrinsic::ppc_altivec_vslw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + srl self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+ Intrinsic::ppc_altivec_vsrw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + sra self.
+ if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+ Intrinsic::ppc_altivec_vsraw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // vsplti + rol self.
+ if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+ ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+ SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+ static const unsigned IIDs[] = { // Intrinsic to use for each size.
+ Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+ Intrinsic::ppc_altivec_vrlw
+ };
+ Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+ }
+
+ // t = vsplti c, result = vsldoi t, t, 1
+ if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 2
+ if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+ }
+ // t = vsplti c, result = vsldoi t, t, 3
+ if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+ SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+ return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+ }
+ }
+
+ return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ SDLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VMRGHW,
+ OP_VMRGLW,
+ OP_VSPLTISW0,
+ OP_VSPLTISW1,
+ OP_VSPLTISW2,
+ OP_VSPLTISW3,
+ OP_VSLDOI4,
+ OP_VSLDOI8,
+ OP_VSLDOI12
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1*9+2)*9+3) return LHS;
+ assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+ int ShufIdxs[16];
+ switch (OpNum) {
+ default: llvm_unreachable("Unknown i32 permute!");
+ case OP_VMRGHW:
+ ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
+ ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+ ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
+ ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+ break;
+ case OP_VMRGLW:
+ ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+ ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+ ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+ ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+ break;
+ case OP_VSPLTISW0:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+0;
+ break;
+ case OP_VSPLTISW1:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+4;
+ break;
+ case OP_VSPLTISW2:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+8;
+ break;
+ case OP_VSPLTISW3:
+ for (unsigned i = 0; i != 16; ++i)
+ ShufIdxs[i] = (i&3)+12;
+ break;
+ case OP_VSLDOI4:
+ return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI8:
+ return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+ case OP_VSLDOI12:
+ return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+ }
+ EVT VT = OpLHS.getValueType();
+ OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+ OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+ SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+ return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
+/// is a shuffle we can handle in a single instruction, return it. Otherwise,
+/// return the code it can be lowered into. Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+ EVT VT = Op.getValueType();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+
+ // Cases that are handled by instructions that take permute immediates
+ // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+ // selected by the instruction selector.
+ if (V2.getOpcode() == ISD::UNDEF) {
+ if (PPC::isSplatShuffleMask(SVOp, 1) ||
+ PPC::isSplatShuffleMask(SVOp, 2) ||
+ PPC::isSplatShuffleMask(SVOp, 4) ||
+ PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) {
+ return Op;
+ }
+ }
+
+ // Altivec has a variety of "shuffle immediates" that take two vector inputs
+ // and produce a fixed permutation. If any of these match, do not lower to
+ // VPERM.
+ unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
+ if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
+ PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
+ PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
+ PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG))
+ return Op;
+
+ // Check to see if this is a shuffle of 4-byte values. If so, we can use our
+ // perfect shuffle table to emit an optimal matching sequence.
+ ArrayRef<int> PermMask = SVOp->getMask();
+
+ unsigned PFIndexes[4];
+ bool isFourElementShuffle = true;
+ for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+ unsigned EltNo = 8; // Start out undef.
+ for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
+ if (PermMask[i*4+j] < 0)
+ continue; // Undef, ignore it.
+
+ unsigned ByteSource = PermMask[i*4+j];
+ if ((ByteSource & 3) != j) {
+ isFourElementShuffle = false;
+ break;
+ }
+
+ if (EltNo == 8) {
+ EltNo = ByteSource/4;
+ } else if (EltNo != ByteSource/4) {
+ isFourElementShuffle = false;
+ break;
+ }
+ }
+ PFIndexes[i] = EltNo;
+ }
+
+ // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+ // perfect shuffle vector to determine if it is cost effective to do this as
+ // discrete instructions, or whether we should use a vperm.
+ // For now, we skip this for little endian until such time as we have a
+ // little-endian perfect shuffle table.
+ if (isFourElementShuffle && !isLittleEndian) {
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex =
+ PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ // Determining when to avoid vperm is tricky. Many things affect the cost
+ // of vperm, particularly how many times the perm mask needs to be computed.
+ // For example, if the perm mask can be hoisted out of a loop or is already
+ // used (perhaps because there are multiple permutes with the same shuffle
+ // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
+ // the loop requires an extra register.
+ //
+ // As a compromise, we only emit discrete instructions if the shuffle can be
+ // generated in 3 or fewer operations. When we have loop information
+ // available, if this block is within a loop, we should avoid using vperm
+ // for 3-operation perms and use a constant pool load instead.
+ if (Cost < 3)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+ }
+
+ // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+ // vector that will get spilled to the constant pool.
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+ // that it is in input element units, not in bytes. Convert now.
+
+ // For little endian, the order of the input vectors is reversed, and
+ // the permutation mask is complemented with respect to 31. This is
+ // necessary to produce proper semantics with the big-endian-biased vperm
+ // instruction.
+ EVT EltVT = V1.getValueType().getVectorElementType();
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+ unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+ for (unsigned j = 0; j != BytesPerElement; ++j)
+ if (isLittleEndian)
+ ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j),
+ MVT::i32));
+ else
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+ MVT::i32));
+ }
+
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ ResultMask);
+ if (isLittleEndian)
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V2, V1, VPermMask);
+ else
+ return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
+ V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison. If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+ bool &isDot) {
+ unsigned IntrinsicID =
+ cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+ CompareOpc = -1;
+ isDot = false;
+ switch (IntrinsicID) {
+ default: return false;
+ // Comparison predicates.
+ case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+ // Normal Comparisons.
+ case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break;
+ case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break;
+ }
+ return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ // If this is a lowered altivec predicate compare, CompareOpc is set to the
+ // opcode number of the comparison.
+ SDLoc dl(Op);
+ int CompareOpc;
+ bool isDot;
+ if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+ return SDValue(); // Don't custom lower most intrinsics.
+
+ // If this is a non-dot comparison, make the VCMP node and we are done.
+ if (!isDot) {
+ SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(CompareOpc, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+ }
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ Op.getOperand(2), // LHS
+ Op.getOperand(3), // RHS
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+
+ // Now that we have the comparison, emit a copy from the CR to a GPR.
+ // This is flagged to the above dot comparison.
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ CompNode.getValue(1));
+
+ // Unpack the result based on how the target uses it.
+ unsigned BitNo; // Bit # of CR6.
+ bool InvertBit; // Invert result?
+ switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Return the value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = false;
+ break;
+ case 1: // Return the inverted value of the EQ bit of CR6.
+ BitNo = 0; InvertBit = true;
+ break;
+ case 2: // Return the value of the LT bit of CR6.
+ BitNo = 2; InvertBit = false;
+ break;
+ case 3: // Return the inverted value of the LT bit of CR6.
+ BitNo = 2; InvertBit = true;
+ break;
+ }
+
+ // Shift the bit into the low position.
+ Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+ DAG.getConstant(8-(3-BitNo), MVT::i32));
+ // Isolate the bit.
+ Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+
+ // If we are supposed to, toggle the bit.
+ if (InvertBit)
+ Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+ DAG.getConstant(1, MVT::i32));
+ return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int
+ // instructions), but for smaller types, we need to first extend up to v2i32
+ // before doing going farther.
+ if (Op.getValueType() == MVT::v2i64) {
+ EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (ExtVT != MVT::v2i32) {
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0));
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op,
+ DAG.getValueType(EVT::getVectorVT(*DAG.getContext(),
+ ExtVT.getVectorElementType(), 4)));
+ Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op);
+ Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op,
+ DAG.getValueType(MVT::v2i32));
+ }
+
+ return Op;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ // Create a stack slot that is 16-byte aligned.
+ MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+ EVT PtrVT = getPointerTy();
+ SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+ // Store the input value into Value#0 of the stack slot.
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+ Op.getOperand(0), FIdx, MachinePointerInfo(),
+ false, false, 0);
+ // Load it out.
+ return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+ false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ if (Op.getValueType() == MVT::v4i32) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl);
+ SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+ SDValue RHSSwap = // = vrlw RHS, 16
+ BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+ // Shrinkify inputs to v8i16.
+ LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+ RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+ // Low parts multiplied together, generating 32-bit results (we ignore the
+ // top parts).
+ SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+ LHS, RHS, DAG, dl, MVT::v4i32);
+
+ SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+ LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+ // Shift the high parts up 16 bits.
+ HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+ Neg16, DAG, dl);
+ return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+ } else if (Op.getValueType() == MVT::v8i16) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+ SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+ return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+ LHS, RHS, Zero, DAG, dl);
+ } else if (Op.getValueType() == MVT::v16i8) {
+ SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+ bool isLittleEndian = Subtarget.isLittleEndian();
+
+ // Multiply the even 8-bit parts, producing 16-bit sums.
+ SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+ // Multiply the odd 8-bit parts, producing 16-bit sums.
+ SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+ LHS, RHS, DAG, dl, MVT::v8i16);
+ OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+ // Merge the results together. Because vmuleub and vmuloub are
+ // instructions with a big-endian bias, we must reverse the
+ // element numbering and reverse the meaning of "odd" and "even"
+ // when generating little endian code.
+ int Ops[16];
+ for (unsigned i = 0; i != 8; ++i) {
+ if (isLittleEndian) {
+ Ops[i*2 ] = 2*i;
+ Ops[i*2+1] = 2*i+16;
+ } else {
+ Ops[i*2 ] = 2*i+1;
+ Ops[i*2+1] = 2*i+1+16;
+ }
+ }
+ if (isLittleEndian)
+ return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
+ else
+ return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+ } else {
+ llvm_unreachable("Unknown mul to lower!");
+ }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
+ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
+ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG, Subtarget);
+
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG, Subtarget);
+
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, Subtarget);
+
+ case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget);
+ case ISD::DYNAMIC_STACKALLOC:
+ return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget);
+
+ case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
+
+ case ISD::LOAD: return LowerLOAD(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
+ case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
+ SDLoc(Op));
+ case ISD::UINT_TO_FP:
+ case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
+ case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
+
+ // Lower 64-bit shifts.
+ case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
+ case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
+ case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::MUL: return LowerMUL(Op, DAG);
+
+ // For counter-based loop handling.
+ case ISD::INTRINSIC_W_CHAIN: return SDValue();
+
+ // Frame & Return address.
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
+ }
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const {
+ const TargetMachine &TM = getTargetMachine();
+ SDLoc dl(N);
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::INTRINSIC_W_CHAIN: {
+ if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
+ Intrinsic::ppc_is_decremented_ctr_nonzero)
+ break;
+
+ assert(N->getValueType(0) == MVT::i1 &&
+ "Unexpected result type for CTR decrement intrinsic");
+ EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
+ SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
+ N->getOperand(1));
+
+ Results.push_back(NewInt);
+ Results.push_back(NewInt.getValue(1));
+ break;
+ }
+ case ISD::VAARG: {
+ if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+ || TM.getSubtarget<PPCSubtarget>().isPPC64())
+ return;
+
+ EVT VT = N->getValueType(0);
+
+ if (VT == MVT::i64) {
+ SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget);
+
+ Results.push_back(NewNode);
+ Results.push_back(NewNode.getValue(1));
+ }
+ return;
+ }
+ case ISD::FP_ROUND_INREG: {
+ assert(N->getValueType(0) == MVT::ppcf128);
+ assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(0));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+ MVT::f64, N->getOperand(0),
+ DAG.getIntPtrConstant(1));
+
+ // Add the two halves of the long double in round-to-zero mode.
+ SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+
+ // We know the low half is about to be thrown away, so just use something
+ // convenient.
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+ FPreg, FPreg));
+ return;
+ }
+ case ISD::FP_TO_SINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
+ Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+ return;
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ bool is64bit, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned TmpReg = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // l[wd]arx dest, ptr
+ // add r0, dest, incr
+ // st[wd]cx. r0, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ bool is8bit, // operation
+ unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // In 64 bit mode we have to use 64 bits for addresses, even though the
+ // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
+ // registers without caring whether they're 32 or 64, but here we're
+ // doing actual arithmetic on the addresses.
+ bool is64bit = Subtarget.isPPC64();
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned incr = MI->getOperand(3).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw incr2, incr, shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // loopMBB:
+ // lwarx tmpDest, ptr
+ // add tmp, tmpDest, incr2
+ // andc tmp2, tmpDest, mask
+ // and tmp3, tmp, mask
+ // or tmp4, tmp3, tmp2
+ // stwcx. tmp4, ptr
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+ .addReg(incr).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+
+ BB = loopMBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ if (BinOpcode)
+ BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+ .addReg(Incr2Reg).addReg(TmpDestReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+ .addReg(TmpReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+ .addReg(Tmp3Reg).addReg(Tmp2Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX))
+ .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg)
+ .addReg(ShiftReg);
+ return BB;
+}
+
+llvm::MachineBasicBlock*
+PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ const BasicBlock *BB = MBB->getBasicBlock();
+ MachineFunction::iterator I = MBB;
+ ++I;
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ unsigned DstReg = MI->getOperand(0).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+ assert(RC->hasType(MVT::i32) && "Invalid destination!");
+ unsigned mainDstReg = MRI.createVirtualRegister(RC);
+ unsigned restoreDstReg = MRI.createVirtualRegister(RC);
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+ // For v = setjmp(buf), we generate
+ //
+ // thisMBB:
+ // SjLjSetup mainMBB
+ // bl mainMBB
+ // v_restore = 1
+ // b sinkMBB
+ //
+ // mainMBB:
+ // buf[LabelOffset] = LR
+ // v_main = 0
+ //
+ // sinkMBB:
+ // v = phi(main, restore)
+ //
+
+ MachineBasicBlock *thisMBB = MBB;
+ MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
+ MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
+ MF->insert(I, mainMBB);
+ MF->insert(I, sinkMBB);
+
+ MachineInstrBuilder MIB;
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), MBB,
+ std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+ // Note that the structure of the jmp_buf used here is not compatible
+ // with that used by libc, and is not designed to be. Specifically, it
+ // stores only those 'reserved' registers that LLVM does not otherwise
+ // understand how to spill. Also, by convention, by the time this
+ // intrinsic is called, Clang has already stored the frame address in the
+ // first slot of the buffer and stack address in the third. Following the
+ // X86 target code, we'll store the jump address in the second slot. We also
+ // need to save the TOC pointer (R2) to handle jumps between shared
+ // libraries, and that will be stored in the fourth slot. The thread
+ // identifier (R13) is not affected.
+
+ // thisMBB:
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
+
+ // Prepare IP either in reg.
+ const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
+ unsigned LabelReg = MRI.createVirtualRegister(PtrRC);
+ unsigned BufReg = MI->getOperand(1).getReg();
+
+ if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) {
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
+ .addReg(PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Setup
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
+ const PPCRegisterInfo *TRI =
+ static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ MIB.addRegMask(TRI->getNoPreservedMask());
+
+ BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
+
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
+ .addMBB(mainMBB);
+ MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
+
+ thisMBB->addSuccessor(mainMBB, /* weight */ 0);
+ thisMBB->addSuccessor(sinkMBB, /* weight */ 1);
+
+ // mainMBB:
+ // mainDstReg = 0
+ MIB = BuildMI(mainMBB, DL,
+ TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
+
+ // Store IP
+ if (Subtarget.isPPC64()) {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
+ .addReg(LabelReg)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
+ mainMBB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(PPC::PHI), DstReg)
+ .addReg(mainDstReg).addMBB(mainMBB)
+ .addReg(restoreDstReg).addMBB(thisMBB);
+
+ MI->eraseFromParent();
+ return sinkMBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const {
+ DebugLoc DL = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ MachineFunction *MF = MBB->getParent();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ // Memory Reference
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+ MVT PVT = getPointerTy();
+ assert((PVT == MVT::i64 || PVT == MVT::i32) &&
+ "Invalid Pointer Size!");
+
+ const TargetRegisterClass *RC =
+ (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned Tmp = MRI.createVirtualRegister(RC);
+ // Since FP is only updated here but NOT referenced, it's treated as GPR.
+ unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
+ unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 :
+ (Subtarget.isSVR4ABI() &&
+ MF->getTarget().getRelocationModel() == Reloc::PIC_ ?
+ PPC::R29 : PPC::R30);
+
+ MachineInstrBuilder MIB;
+
+ const int64_t LabelOffset = 1 * PVT.getStoreSize();
+ const int64_t SPOffset = 2 * PVT.getStoreSize();
+ const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
+
+ unsigned BufReg = MI->getOperand(0).getReg();
+
+ // Reload FP (the jumped-to function may not have had a
+ // frame pointer, and if so, then its r31 will be restored
+ // as necessary).
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
+ .addImm(0)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload IP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
+ .addImm(LabelOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload SP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
+ .addImm(SPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
+ // Reload TOC
+ if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
+ .addImm(TOCOffset)
+ .addReg(BufReg);
+
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+ }
+
+ // Jump
+ BuildMI(*MBB, MI, DL,
+ TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
+ BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
+
+ MI->eraseFromParent();
+ return MBB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_SetJmp64) {
+ return emitEHSjLjSetJmp(MI, BB);
+ } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 ||
+ MI->getOpcode() == PPC::EH_SjLj_LongJmp64) {
+ return emitEHSjLjLongJmp(MI, BB);
+ }
+
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ // To "insert" these instructions we actually have to insert their
+ // control-flow patterns.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ MachineFunction *F = BB->getParent();
+
+ if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8)) {
+ SmallVector<MachineOperand, 2> Cond;
+ if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8)
+ Cond.push_back(MI->getOperand(4));
+ else
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(MI->getOperand(1));
+
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+ Cond, MI->getOperand(2).getReg(),
+ MI->getOperand(3).getReg());
+ } else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+ MI->getOpcode() == PPC::SELECT_CC_I8 ||
+ MI->getOpcode() == PPC::SELECT_CC_F4 ||
+ MI->getOpcode() == PPC::SELECT_CC_F8 ||
+ MI->getOpcode() == PPC::SELECT_CC_VRRC ||
+ MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
+ // The incoming instruction knows the destination vreg to set, the
+ // condition code register to branch on, the true/false values to
+ // select between, and a branch opcode to use.
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // cmpTY ccX, r1, r2
+ // bCC copy1MBB
+ // fallthrough --> copy0MBB
+ MachineBasicBlock *thisMBB = BB;
+ MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ DebugLoc dl = MI->getDebugLoc();
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
+ if (MI->getOpcode() == PPC::SELECT_I4 ||
+ MI->getOpcode() == PPC::SELECT_I8 ||
+ MI->getOpcode() == PPC::SELECT_F4 ||
+ MI->getOpcode() == PPC::SELECT_F8 ||
+ MI->getOpcode() == PPC::SELECT_VRRC) {
+ BuildMI(BB, dl, TII->get(PPC::BC))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ } else {
+ unsigned SelectPred = MI->getOperand(4).getImm();
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+ }
+
+ // copy0MBB:
+ // %FalseValue = ...
+ // # fallthrough to sinkMBB
+ BB = copy0MBB;
+
+ // Update machine-CFG edges
+ BB->addSuccessor(sinkMBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ BB = sinkMBB;
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+ .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+ }
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::NAND);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+ BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+ else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+ BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+ BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+ BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+ BB = EmitAtomicBinary(MI, BB, false, 0);
+ else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+ BB = EmitAtomicBinary(MI, BB, true, 0);
+
+ else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+ bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // l[wd]arx dest, ptr
+ // cmp[wd] dest, oldval
+ // bne- midMBB
+ // loop2MBB:
+ // st[wd]cx. newval, ptr
+ // bne- loopMBB
+ // b exitBB
+ // midMBB:
+ // st[wd]cx. dest, ptr
+ // exitBB:
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+ .addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+ .addReg(oldval).addReg(dest);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(newval).addReg(ptrA).addReg(ptrB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+ .addReg(dest).addReg(ptrA).addReg(ptrB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+ MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+ // We must use 64-bit registers for addresses when targeting 64-bit,
+ // since we're actually doing arithmetic on them. Other registers
+ // can be 32-bit.
+ bool is64bit = Subtarget.isPPC64();
+ bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptrA = MI->getOperand(1).getReg();
+ unsigned ptrB = MI->getOperand(2).getReg();
+ unsigned oldval = MI->getOperand(3).getReg();
+ unsigned newval = MI->getOperand(4).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop1MBB);
+ F->insert(It, loop2MBB);
+ F->insert(It, midMBB);
+ F->insert(It, exitMBB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ std::next(MachineBasicBlock::iterator(MI)), BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ const TargetRegisterClass *RC =
+ is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+ (const TargetRegisterClass *) &PPC::GPRCRegClass;
+ unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+ unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+ unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+ unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+ unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+ unsigned Ptr1Reg;
+ unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+ unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loop1MBB);
+
+ // The 4-byte load must be aligned, while a char or short may be
+ // anywhere in the word. Hence all this nasty bookkeeping code.
+ // add ptr1, ptrA, ptrB [copy if ptrA==0]
+ // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+ // xori shift, shift1, 24 [16]
+ // rlwinm ptr, ptr1, 0, 0, 29
+ // slw newval2, newval, shift
+ // slw oldval2, oldval,shift
+ // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+ // slw mask, mask2, shift
+ // and newval3, newval2, mask
+ // and oldval3, oldval2, mask
+ // loop1MBB:
+ // lwarx tmpDest, ptr
+ // and tmp, tmpDest, mask
+ // cmpw tmp, oldval3
+ // bne- midMBB
+ // loop2MBB:
+ // andc tmp2, tmpDest, mask
+ // or tmp4, tmp2, newval3
+ // stwcx. tmp4, ptr
+ // bne- loop1MBB
+ // b exitBB
+ // midMBB:
+ // stwcx. tmpDest, ptr
+ // exitBB:
+ // srw dest, tmpDest, shift
+ if (ptrA != ZeroReg) {
+ Ptr1Reg = RegInfo.createVirtualRegister(RC);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+ .addReg(ptrA).addReg(ptrB);
+ } else {
+ Ptr1Reg = ptrB;
+ }
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+ .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+ BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+ .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+ if (is64bit)
+ BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(61);
+ else
+ BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+ .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+ BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+ .addReg(newval).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+ .addReg(oldval).addReg(ShiftReg);
+ if (is8bit)
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+ else {
+ BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+ BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+ .addReg(Mask3Reg).addImm(65535);
+ }
+ BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+ .addReg(Mask2Reg).addReg(ShiftReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+ .addReg(NewVal2Reg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+ .addReg(OldVal2Reg).addReg(MaskReg);
+
+ BB = loop1MBB;
+ BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+ .addReg(TmpReg).addReg(OldVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(midMBB);
+
+ BB = loop2MBB;
+ BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+ .addReg(TmpDestReg).addReg(MaskReg);
+ BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+ .addReg(Tmp2Reg).addReg(NewVal3Reg);
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+ BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ BB = midMBB;
+ BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+ .addReg(ZeroReg).addReg(PtrReg);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg)
+ .addReg(ShiftReg);
+ } else if (MI->getOpcode() == PPC::FADDrtz) {
+ // This pseudo performs an FADD with rounding mode temporarily forced
+ // to round-to-zero. We emit this via custom inserter since the FPSCR
+ // is not modeled at the SelectionDAG level.
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ unsigned Src2 = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
+
+ // Save FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
+
+ // Set rounding mode to round-to-zero.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31);
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30);
+
+ // Perform addition.
+ BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+
+ // Restore FPSCR value.
+ BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
+ } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) {
+ unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 ||
+ MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ?
+ PPC::ANDIo8 : PPC::ANDIo;
+ bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT ||
+ MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ?
+ &PPC::GPRCRegClass :
+ &PPC::G8RCRegClass);
+
+ DebugLoc dl = MI->getDebugLoc();
+ BuildMI(*BB, MI, dl, TII->get(Opcode), Dest)
+ .addReg(MI->getOperand(1).getReg()).addImm(1);
+ BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY),
+ MI->getOperand(0).getReg())
+ .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT);
+ } else {
+ llvm_unreachable("Unexpected instr type to insert");
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal, we need to find the zero of the function:
+ // F(X) = A X - 1 [which has a zero at X = 1/A]
+ // =>
+ // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+ // does not require additional intermediate precision]
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(Op);
+
+ SDValue FPOne =
+ DAG.getConstantFP(1.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPOne, FPOne, FPOne, FPOne);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.isAfterLegalizeVectorOps())
+ return SDValue();
+
+ EVT VT = Op.getValueType();
+
+ if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
+ (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
+ (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
+ (VT == MVT::v2f64 && Subtarget.hasVSX())) {
+
+ // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+ // For the reciprocal sqrt, we need to find the zero of the function:
+ // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
+ // =>
+ // X_{i+1} = X_i (1.5 - A X_i^2 / 2)
+ // As a result, we precompute A/2 prior to the iteration loop.
+
+ // Convergence is quadratic, so we essentially double the number of digits
+ // correct after every iteration. The minimum architected relative
+ // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has
+ // 23 digits and double has 52 digits.
+ int Iterations = Subtarget.hasRecipPrec() ? 1 : 3;
+ if (VT.getScalarType() == MVT::f64)
+ ++Iterations;
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(Op);
+
+ SDValue FPThreeHalves =
+ DAG.getConstantFP(1.5, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 &&
+ "Unknown vector type");
+ FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
+ FPThreeHalves, FPThreeHalves,
+ FPThreeHalves, FPThreeHalves);
+ }
+
+ SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op);
+ DCI.AddToWorklist(Est.getNode());
+
+ // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that
+ // this entire sequence requires only one FP constant.
+ SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op);
+ DCI.AddToWorklist(HalfArg.getNode());
+
+ // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
+ for (int i = 0; i < Iterations; ++i) {
+ SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst);
+ DCI.AddToWorklist(NewEst.getNode());
+
+ Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst);
+ DCI.AddToWorklist(Est.getNode());
+ }
+
+ return Est;
+ }
+
+ return SDValue();
+}
+
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ EVT VT = LS->getMemoryVT();
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LS->getBasePtr();
+ SDValue BaseLoc = Base->getBasePtr();
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+ SDValue Chain = LD->getChain();
+ EVT VT = LD->getMemoryVT();
+
+ SmallSet<SDNode *, 16> LoadRoots;
+ SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+ SmallSet<SDNode *, 16> Visited;
+
+ // First, search up the chain, branching to follow all token-factor operands.
+ // If we find a consecutive load, then we're done, otherwise, record all
+ // nodes just above the top-level loads and token factors.
+ while (!Queue.empty()) {
+ SDNode *ChainNext = Queue.pop_back_val();
+ if (!Visited.insert(ChainNext))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ if (!Visited.count(ChainLD->getChain().getNode()))
+ Queue.push_back(ChainLD->getChain().getNode());
+ } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+ for (const SDUse &O : ChainNext->ops())
+ if (!Visited.count(O.getNode()))
+ Queue.push_back(O.getNode());
+ } else
+ LoadRoots.insert(ChainNext);
+ }
+
+ // Second, search down the chain, starting from the top-level nodes recorded
+ // in the first phase. These top-level nodes are the nodes just above all
+ // loads and token factors. Starting with their uses, recursively look though
+ // all loads (just the chain uses) and token factors to find a consecutive
+ // load.
+ Visited.clear();
+ Queue.clear();
+
+ for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+ IE = LoadRoots.end(); I != IE; ++I) {
+ Queue.push_back(*I);
+
+ while (!Queue.empty()) {
+ SDNode *LoadRoot = Queue.pop_back_val();
+ if (!Visited.insert(LoadRoot))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+ UE = LoadRoot->use_end(); UI != UE; ++UI)
+ if (((isa<LoadSDNode>(*UI) &&
+ cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+ Queue.push_back(*UI);
+ }
+ }
+
+ return false;
+}
+
+SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ assert(Subtarget.useCRBits() &&
+ "Expecting to be tracking CR bits");
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // trunc(binary-ops(zext(x), zext(y)))
+ // or
+ // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
+ // such that we're unnecessarily moving things into GPRs when it would be
+ // better to keep them in CR bits.
+
+ // Note that trunc here can be an actual i1 trunc, or can be the effective
+ // truncation that comes from a setcc or select_cc.
+ if (N->getOpcode() == ISD::TRUNCATE &&
+ N->getValueType(0) != MVT::i1)
+ return SDValue();
+
+ if (N->getOperand(0).getValueType() != MVT::i32 &&
+ N->getOperand(0).getValueType() != MVT::i64)
+ return SDValue();
+
+ if (N->getOpcode() == ISD::SETCC ||
+ N->getOpcode() == ISD::SELECT_CC) {
+ // If we're looking at a comparison, then we need to make sure that the
+ // high bits (all except for the first) don't matter the result.
+ ISD::CondCode CC =
+ cast<CondCodeSDNode>(N->getOperand(
+ N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
+ unsigned OpBits = N->getOperand(0).getValueSizeInBits();
+
+ if (ISD::isSignedIntSetCC(CC)) {
+ if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
+ DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
+ return SDValue();
+ } else if (ISD::isUnsignedIntSetCC(CC)) {
+ if (!DAG.MaskedValueIsZero(N->getOperand(0),
+ APInt::getHighBitsSet(OpBits, OpBits-1)) ||
+ !DAG.MaskedValueIsZero(N->getOperand(1),
+ APInt::getHighBitsSet(OpBits, OpBits-1)))
+ return SDValue();
+ } else {
+ // This is neither a signed nor an unsigned comparison, just make sure
+ // that the high bits are equal.
+ APInt Op1Zero, Op1One;
+ APInt Op2Zero, Op2One;
+ DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
+ DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
+
+ // We don't really care about what is known about the first bit (if
+ // anything), so clear it in all masks prior to comparing them.
+ Op1Zero.clearBit(0); Op1One.clearBit(0);
+ Op2Zero.clearBit(0); Op2One.clearBit(0);
+
+ if (Op1Zero != Op2Zero || Op1One != Op2One)
+ return SDValue();
+ }
+ }
+
+ // We now know that the higher-order bits are irrelevant, we just need to
+ // make sure that all of the intermediate operations are bit operations, and
+ // all inputs are extensions.
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
+ N->getOperand(1).getOpcode() != ISD::AND &&
+ N->getOperand(1).getOpcode() != ISD::OR &&
+ N->getOperand(1).getOpcode() != ISD::XOR &&
+ N->getOperand(1).getOpcode() != ISD::SELECT &&
+ N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
+ N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
+ N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
+ N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps, PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ for (unsigned i = 0; i < 2; ++i) {
+ if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(N->getOperand(i)))
+ Inputs.push_back(N->getOperand(i));
+ else
+ BinOps.push_back(N->getOperand(i));
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ break;
+ }
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by extensions.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
+ BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
+ BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
+ BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not an extension or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ // Replace all inputs with the extension operand.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constants may have users outside the cluster of to-be-promoted nodes,
+ // and so we need to replace those as we do the promotions.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (i1) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first. Any intermediate truncations or
+ // extensions disappear.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ if (PromOp.getOpcode() == ISD::TRUNCATE ||
+ PromOp.getOpcode() == ISD::SIGN_EXTEND ||
+ PromOp.getOpcode() == ISD::ZERO_EXTEND ||
+ PromOp.getOpcode() == ISD::ANY_EXTEND) {
+ if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
+ PromOp.getOperand(0).getValueType() != MVT::i1) {
+ // The operand is not yet ready (see comment below).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SDValue RepValue = PromOp.getOperand(0);
+ if (isa<ConstantSDNode>(RepValue))
+ RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
+ continue;
+ }
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != MVT::i1) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If there are any constant inputs, make sure they're replaced now.
+ for (unsigned i = 0; i < 2; ++i)
+ if (isa<ConstantSDNode>(Ops[C+i]))
+ Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
+ }
+
+ // Now we're left with the initial truncation itself.
+ if (N->getOpcode() == ISD::TRUNCATE)
+ return N->getOperand(0);
+
+ // Otherwise, this is a comparison. The operands to be compared have just
+ // changed type (to i1), but everything else is the same.
+ return SDValue(N, 0);
+}
+
+SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+
+ // If we're tracking CR bits, we need to be careful that we don't have:
+ // zext(binary-ops(trunc(x), trunc(y)))
+ // or
+ // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
+ // such that we're unnecessarily moving things into CR bits that can more
+ // efficiently stay in GPRs. Note that if we're not certain that the high
+ // bits are set as required by the final extension, we still may need to do
+ // some masking to get the proper behavior.
+
+ // This same functionality is important on PPC64 when dealing with
+ // 32-to-64-bit extensions; these occur often when 32-bit values are used as
+ // the return values of functions. Because it is so similar, it is handled
+ // here as well.
+
+ if (N->getValueType(0) != MVT::i32 &&
+ N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ if (!((N->getOperand(0).getValueType() == MVT::i1 &&
+ Subtarget.useCRBits()) ||
+ (N->getOperand(0).getValueType() == MVT::i32 &&
+ Subtarget.isPPC64())))
+ return SDValue();
+
+ if (N->getOperand(0).getOpcode() != ISD::AND &&
+ N->getOperand(0).getOpcode() != ISD::OR &&
+ N->getOperand(0).getOpcode() != ISD::XOR &&
+ N->getOperand(0).getOpcode() != ISD::SELECT &&
+ N->getOperand(0).getOpcode() != ISD::SELECT_CC)
+ return SDValue();
+
+ SmallVector<SDValue, 4> Inputs;
+ SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
+ SmallPtrSet<SDNode *, 16> Visited;
+
+ // Visit all inputs, collect all binary operations (and, or, xor and
+ // select) that are all fed by truncations.
+ while (!BinOps.empty()) {
+ SDValue BinOp = BinOps.back();
+ BinOps.pop_back();
+
+ if (!Visited.insert(BinOp.getNode()))
+ continue;
+
+ PromOps.push_back(BinOp);
+
+ for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
+ // The condition of the select is not promoted.
+ if (BinOp.getOpcode() == ISD::SELECT && i == 0)
+ continue;
+ if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
+ continue;
+
+ if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
+ isa<ConstantSDNode>(BinOp.getOperand(i))) {
+ Inputs.push_back(BinOp.getOperand(i));
+ } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
+ BinOp.getOperand(i).getOpcode() == ISD::OR ||
+ BinOp.getOperand(i).getOpcode() == ISD::XOR ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
+ BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
+ BinOps.push_back(BinOp.getOperand(i));
+ } else {
+ // We have an input that is not a truncation or another binary
+ // operation; we'll abort this transformation.
+ return SDValue();
+ }
+ }
+ }
+
+ // Make sure that this is a self-contained cluster of operations (which
+ // is not quite the same thing as saying that everything has only one
+ // use).
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
+ UE = Inputs[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == Inputs[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == Inputs[i] ||
+ User->getOperand(1) == Inputs[i])
+ return SDValue();
+ }
+ }
+ }
+
+ for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
+ for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
+ UE = PromOps[i].getNode()->use_end();
+ UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User != N && !Visited.count(User))
+ return SDValue();
+
+ // Make sure that we're not going to promote the non-output-value
+ // operand(s) or SELECT or SELECT_CC.
+ // FIXME: Although we could sometimes handle this, and it does occur in
+ // practice that one of the condition inputs to the select is also one of
+ // the outputs, we currently can't deal with this.
+ if (User->getOpcode() == ISD::SELECT) {
+ if (User->getOperand(0) == PromOps[i])
+ return SDValue();
+ } else if (User->getOpcode() == ISD::SELECT_CC) {
+ if (User->getOperand(0) == PromOps[i] ||
+ User->getOperand(1) == PromOps[i])
+ return SDValue();
+ }
+ }
+ }
+
+ unsigned PromBits = N->getOperand(0).getValueSizeInBits();
+ bool ReallyNeedsExt = false;
+ if (N->getOpcode() != ISD::ANY_EXTEND) {
+ // If all of the inputs are not already sign/zero extended, then
+ // we'll still need to do that at the end.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ unsigned OpBits =
+ Inputs[i].getOperand(0).getValueSizeInBits();
+ assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
+
+ if ((N->getOpcode() == ISD::ZERO_EXTEND &&
+ !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
+ APInt::getHighBitsSet(OpBits,
+ OpBits-PromBits))) ||
+ (N->getOpcode() == ISD::SIGN_EXTEND &&
+ DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
+ (OpBits-(PromBits-1)))) {
+ ReallyNeedsExt = true;
+ break;
+ }
+ }
+ }
+
+ // Replace all inputs, either with the truncation operand, or a
+ // truncation or extension to the final output type.
+ for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
+ // Constant inputs need to be replaced with the to-be-promoted nodes that
+ // use them because they might have users outside of the cluster of
+ // promoted nodes.
+ if (isa<ConstantSDNode>(Inputs[i]))
+ continue;
+
+ SDValue InSrc = Inputs[i].getOperand(0);
+ if (Inputs[i].getValueType() == N->getValueType(0))
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
+ else if (N->getOpcode() == ISD::SIGN_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ else
+ DAG.ReplaceAllUsesOfValueWith(Inputs[i],
+ DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
+ }
+
+ // Replace all operations (these are all the same, but have a different
+ // (promoted) return type). DAG.getNode will validate that the types of
+ // a binary operator match, so go through the list in reverse so that
+ // we've likely promoted both operands first.
+ while (!PromOps.empty()) {
+ SDValue PromOp = PromOps.back();
+ PromOps.pop_back();
+
+ unsigned C;
+ switch (PromOp.getOpcode()) {
+ default: C = 0; break;
+ case ISD::SELECT: C = 1; break;
+ case ISD::SELECT_CC: C = 2; break;
+ }
+
+ if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
+ PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
+ (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
+ PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
+ // The to-be-promoted operands of this node have not yet been
+ // promoted (this should be rare because we're going through the
+ // list backward, but if one of the operands has several users in
+ // this cluster of to-be-promoted nodes, it is possible).
+ PromOps.insert(PromOps.begin(), PromOp);
+ continue;
+ }
+
+ SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
+ PromOp.getNode()->op_end());
+
+ // If this node has constant inputs, then they'll need to be promoted here.
+ for (unsigned i = 0; i < 2; ++i) {
+ if (!isa<ConstantSDNode>(Ops[C+i]))
+ continue;
+ if (Ops[C+i].getValueType() == N->getValueType(0))
+ continue;
+
+ if (N->getOpcode() == ISD::SIGN_EXTEND)
+ Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else if (N->getOpcode() == ISD::ZERO_EXTEND)
+ Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ else
+ Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
+ }
+
+ DAG.ReplaceAllUsesOfValueWith(PromOp,
+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
+ }
+
+ // Now we're left with the initial extension itself.
+ if (!ReallyNeedsExt)
+ return N->getOperand(0);
+
+ // To zero extend, just mask off everything except for the first bit (in the
+ // i1 case).
+ if (N->getOpcode() == ISD::ZERO_EXTEND)
+ return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
+ DAG.getConstant(APInt::getLowBitsSet(
+ N->getValueSizeInBits(0), PromBits),
+ N->getValueType(0)));
+
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ "Invalid extension type");
+ EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0));
+ SDValue ShiftCst =
+ DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy);
+ return DAG.getNode(ISD::SRA, dl, N->getValueType(0),
+ DAG.getNode(ISD::SHL, dl, N->getValueType(0),
+ N->getOperand(0), ShiftCst), ShiftCst);
+}
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ const TargetMachine &TM = getTargetMachine();
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc dl(N);
+ switch (N->getOpcode()) {
+ default: break;
+ case PPCISD::SHL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 << V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRL:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue()) // 0 >>u V -> 0.
+ return N->getOperand(0);
+ }
+ break;
+ case PPCISD::SRA:
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (C->isNullValue() || // 0 >>s V -> 0.
+ C->isAllOnesValue()) // -1 >>s V -> -1.
+ return N->getOperand(0);
+ }
+ break;
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ return DAGCombineExtBoolTrunc(N, DCI);
+ case ISD::TRUNCATE:
+ case ISD::SETCC:
+ case ISD::SELECT_CC:
+ return DAGCombineTruncBoolExt(N, DCI);
+ case ISD::FDIV: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
+ if (RV.getNode()) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode()) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
+ N->getValueType(0), RV);
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND &&
+ N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) {
+ SDValue RV =
+ DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
+ DCI);
+ if (RV.getNode()) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
+ N->getValueType(0), RV,
+ N->getOperand(1).getOperand(1));
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+ }
+
+ SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
+ if (RV.getNode()) {
+ DCI.AddToWorklist(RV.getNode());
+ return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
+ N->getOperand(0), RV);
+ }
+
+ }
+ break;
+ case ISD::FSQRT: {
+ assert(TM.Options.UnsafeFPMath &&
+ "Reciprocal estimates require UnsafeFPMath");
+
+ // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
+ // reciprocal sqrt.
+ SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
+ if (RV.getNode()) {
+ DCI.AddToWorklist(RV.getNode());
+ RV = DAGCombineFastRecip(RV, DCI);
+ if (RV.getNode()) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
+ return RV;
+ }
+ }
+
+ }
+ break;
+ case ISD::SINT_TO_FP:
+ if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+ if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+ // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+ // We allow the src/dst to be either f32/f64, but the intermediate
+ // type must be i64.
+ if (N->getOperand(0).getValueType() == MVT::i64 &&
+ N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(0).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+
+ Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ if (N->getValueType(0) == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+ DAG.getIntPtrConstant(0));
+ DCI.AddToWorklist(Val.getNode());
+ }
+ return Val;
+ } else if (N->getOperand(0).getValueType() == MVT::i32) {
+ // If the intermediate type is i32, we can avoid the load/store here
+ // too.
+ }
+ }
+ }
+ break;
+ case ISD::STORE:
+ // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+ if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+ !cast<StoreSDNode>(N)->isTruncatingStore() &&
+ N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+ N->getOperand(1).getValueType() == MVT::i32 &&
+ N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+ SDValue Val = N->getOperand(1).getOperand(0);
+ if (Val.getValueType() == MVT::f32) {
+ Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+ }
+ Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+ DCI.AddToWorklist(Val.getNode());
+
+ SDValue Ops[] = {
+ N->getOperand(0), Val, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+
+ Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+ DAG.getVTList(MVT::Other), Ops,
+ cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ DCI.AddToWorklist(Val.getNode());
+ return Val;
+ }
+
+ // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+ if (cast<StoreSDNode>(N)->isUnindexed() &&
+ N->getOperand(1).getOpcode() == ISD::BSWAP &&
+ N->getOperand(1).getNode()->hasOneUse() &&
+ (N->getOperand(1).getValueType() == MVT::i32 ||
+ N->getOperand(1).getValueType() == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getOperand(1).getValueType() == MVT::i64))) {
+ SDValue BSwapOp = N->getOperand(1).getOperand(0);
+ // Do an any-extend to 32-bits if this is a half-word input.
+ if (BSwapOp.getValueType() == MVT::i16)
+ BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+ SDValue Ops[] = {
+ N->getOperand(0), BSwapOp, N->getOperand(2),
+ DAG.getValueType(N->getOperand(1).getValueType())
+ };
+ return
+ DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+ Ops, cast<StoreSDNode>(N)->getMemoryVT(),
+ cast<StoreSDNode>(N)->getMemOperand());
+ }
+ break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT VT = LD->getValueType(0);
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+ if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
+ TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ LD->getAlignment() < ABIAlignment) {
+ // This is a type-legal unaligned Altivec load.
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+ bool isLittleEndian = Subtarget.isLittleEndian();
+
+ // This implements the loading of unaligned vectors as described in
+ // the venerable Apple Velocity Engine overview. Specifically:
+ // https://developer.apple.com/hardwaredrivers/ve/alignment.html
+ // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
+ //
+ // The general idea is to expand a sequence of one or more unaligned
+ // loads into an alignment-based permutation-control instruction (lvsl
+ // or lvsr), a series of regular vector loads (which always truncate
+ // their input address to an aligned address), and a series of
+ // permutations. The results of these permutations are the requested
+ // loaded values. The trick is that the last "extra" load is not taken
+ // from the address you might suspect (sizeof(vector) bytes after the
+ // last requested load), but rather sizeof(vector) - 1 bytes after the
+ // last requested vector. The point of this is to avoid a page fault if
+ // the base address happened to be aligned. This works because if the
+ // base address is aligned, then adding less than a full vector length
+ // will cause the last vector in the sequence to be (re)loaded.
+ // Otherwise, the next vector will be fetched as you might suspect was
+ // necessary.
+
+ // We might be able to reuse the permutation generation from
+ // a different base address offset from this one by an aligned amount.
+ // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+ // optimization later.
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8);
+
+ // Refine the alignment of the original load (a "new" load created here
+ // which was identical to the first except for the alignment would be
+ // merged with the existing node regardless).
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(LD->getPointerInfo(),
+ LD->getMemOperand()->getFlags(),
+ LD->getMemoryVT().getStoreSize(),
+ ABIAlignment);
+ LD->refineAlignment(MMO);
+ SDValue BaseLoad = SDValue(LD, 0);
+
+ // Note that the value of IncOffset (which is provided to the next
+ // load's pointer info offset value, and thus used to calculate the
+ // alignment), and the value of IncValue (which is actually used to
+ // increment the pointer value) are different! This is because we
+ // require the next load to appear to be aligned, even though it
+ // is actually offset from the base pointer by a lesser amount.
+ int IncOffset = VT.getSizeInBits() / 8;
+ int IncValue = IncOffset;
+
+ // Walk (both up and down) the chain looking for another load at the real
+ // (aligned) offset (the alignment of the other load does not matter in
+ // this case). If found, then do not use the offset reduction trick, as
+ // that will prevent the loads from being later combined (as they would
+ // otherwise be duplicates).
+ if (!findConsecutiveLoad(LD, DAG))
+ --IncValue;
+
+ SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+
+ SDValue ExtraLoad =
+ DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncOffset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), ABIAlignment);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ BaseLoad.getValue(1), ExtraLoad.getValue(1));
+
+ if (BaseLoad.getValueType() != MVT::v4i32)
+ BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
+
+ if (ExtraLoad.getValueType() != MVT::v4i32)
+ ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
+
+ // Because vperm has a big-endian bias, we must reverse the order
+ // of the input vectors and complement the permute control vector
+ // when generating little endian code. We have already handled the
+ // latter by using lvsr instead of lvsl, so just reverse BaseLoad
+ // and ExtraLoad here.
+ SDValue Perm;
+ if (isLittleEndian)
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ ExtraLoad, BaseLoad, PermCntl, DAG, dl);
+ else
+ Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+
+ if (VT != MVT::v4i32)
+ Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+
+ // Now we need to be really careful about how we update the users of the
+ // original load. We cannot just call DCI.CombineTo (or
+ // DAG.ReplaceAllUsesWith for that matter), because the load still has
+ // uses created here (the permutation for example) that need to stay.
+ SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ while (UI != UE) {
+ SDUse &Use = UI.getUse();
+ SDNode *User = *UI;
+ // Note: BaseLoad is checked here because it might not be N, but a
+ // bitcast of N.
+ if (User == Perm.getNode() || User == BaseLoad.getNode() ||
+ User == TF.getNode() || Use.getResNo() > 1) {
+ ++UI;
+ continue;
+ }
+
+ SDValue To = Use.getResNo() ? TF : Perm;
+ ++UI;
+
+ SmallVector<SDValue, 8> Ops;
+ for (const SDUse &O : User->ops()) {
+ if (O == Use)
+ Ops.push_back(To);
+ else
+ Ops.push_back(O);
+ }
+
+ DAG.UpdateNodeOperands(User, Ops);
+ }
+
+ return SDValue(N, 0);
+ }
+ }
+ break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ bool isLittleEndian = Subtarget.isLittleEndian();
+ Intrinsic::ID Intr = (isLittleEndian ?
+ Intrinsic::ppc_altivec_lvsr :
+ Intrinsic::ppc_altivec_lvsl);
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
+ SDValue Add = N->getOperand(1);
+
+ if (DAG.MaskedValueIsZero(Add->getOperand(1),
+ APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
+ Add.getValueType().getScalarType().getSizeInBits()))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+ Intr) {
+ // We've found another LVSL/LVSR, and this address is an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
+
+ return SDValue(*UI, 0);
+ }
+ }
+ }
+ }
+ }
+
+ break;
+ case ISD::BSWAP:
+ // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
+ (TM.getSubtarget<PPCSubtarget>().hasLDBRX() &&
+ TM.getSubtarget<PPCSubtarget>().isPPC64() &&
+ N->getValueType(0) == MVT::i64))) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+ // Create the byte-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr(), // Ptr
+ DAG.getValueType(N->getValueType(0)) // VT
+ };
+ SDValue BSLoad =
+ DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+ DAG.getVTList(N->getValueType(0) == MVT::i64 ?
+ MVT::i64 : MVT::i32, MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+ // If this is an i16 load, insert the truncate.
+ SDValue ResVal = BSLoad;
+ if (N->getValueType(0) == MVT::i16)
+ ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+ // First, combine the bswap away. This makes the value produced by the
+ // load dead.
+ DCI.CombineTo(N, ResVal);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the bswap is dead.
+ DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+
+ break;
+ case PPCISD::VCMP: {
+ // If a VCMPo node already exists with exactly the same operands as this
+ // node, use its result instead of this node (VCMPo computes both a CR6 and
+ // a normal output).
+ //
+ if (!N->getOperand(0).hasOneUse() &&
+ !N->getOperand(1).hasOneUse() &&
+ !N->getOperand(2).hasOneUse()) {
+
+ // Scan all of the users of the LHS, looking for VCMPo's that match.
+ SDNode *VCMPoNode = nullptr;
+
+ SDNode *LHSN = N->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+ UI != E; ++UI)
+ if (UI->getOpcode() == PPCISD::VCMPo &&
+ UI->getOperand(1) == N->getOperand(1) &&
+ UI->getOperand(2) == N->getOperand(2) &&
+ UI->getOperand(0) == N->getOperand(0)) {
+ VCMPoNode = *UI;
+ break;
+ }
+
+ // If there is no VCMPo node, or if the flag value has a single use, don't
+ // transform this.
+ if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+ break;
+
+ // Look at the (necessarily single) use of the flag value. If it has a
+ // chain, this transformation is more complex. Note that multiple things
+ // could use the value result, which we should ignore.
+ SDNode *FlagUser = nullptr;
+ for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+ FlagUser == nullptr; ++UI) {
+ assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+ SDNode *User = *UI;
+ for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+ if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+ FlagUser = User;
+ break;
+ }
+ }
+ }
+
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
+ return SDValue(VCMPoNode, 0);
+ }
+ break;
+ }
+ case ISD::BRCOND: {
+ SDValue Cond = N->getOperand(1);
+ SDValue Target = N->getOperand(2);
+
+ if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero) {
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
+ assert(Cond.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
+ N->getOperand(0), Target);
+ }
+ }
+ break;
+ case ISD::BR_CC: {
+ // If this is a branch on an altivec predicate comparison, lower this so
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
+ // lowering is done pre-legalize, because the legalizer lowers the predicate
+ // compare down to code that is difficult to reassemble.
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+ SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+
+ // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
+ // value. If so, pass-through the AND to get to the intrinsic.
+ if (LHS.getOpcode() == ISD::AND &&
+ LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
+ isZero())
+ LHS = LHS.getOperand(0);
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(RHS)) {
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ "Counter decrement comparison is not EQ or NE");
+
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ bool isBDNZ = (CC == ISD::SETEQ && Val) ||
+ (CC == ISD::SETNE && !Val);
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
+ assert(LHS.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ int CompareOpc;
+ bool isDot;
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+ assert(isDot && "Can't compare against a vector result!");
+
+ // If this is a comparison against something other than 0/1, then we know
+ // that the condition is never/always true.
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ if (Val != 0 && Val != 1) {
+ if (CC == ISD::SETEQ) // Cond never true, remove branch.
+ return N->getOperand(0);
+ // Always !=, turn it into an unconditional branch.
+ return DAG.getNode(ISD::BR, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
+ bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+ // Create the PPCISD altivec 'dot' comparison node.
+ SDValue Ops[] = {
+ LHS.getOperand(2), // LHS of compare
+ LHS.getOperand(3), // RHS of compare
+ DAG.getConstant(CompareOpc, MVT::i32)
+ };
+ EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
+
+ // Unpack the result based on how the target uses it.
+ PPC::Predicate CompOpc;
+ switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+ default: // Can't happen, don't crash on invalid number though.
+ case 0: // Branch on the value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+ break;
+ case 1: // Branch on the inverted value of the EQ bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+ break;
+ case 2: // Branch on the value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+ break;
+ case 3: // Branch on the inverted value of the LT bit of CR6.
+ CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+ break;
+ }
+
+ return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+ DAG.getConstant(CompOpc, MVT::i32),
+ DAG.getRegister(PPC::CR6, MVT::i32),
+ N->getOperand(4), CompNode.getValue(1));
+ }
+ break;
+ }
+ }
+
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
+ switch (Op.getOpcode()) {
+ default: break;
+ case PPCISD::LBRX: {
+ // lhbrx is known to have the top bits cleared out.
+ if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+ KnownZero = 0xFFFF0000;
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+ default: break;
+ case Intrinsic::ppc_altivec_vcmpbfp_p:
+ case Intrinsic::ppc_altivec_vcmpeqfp_p:
+ case Intrinsic::ppc_altivec_vcmpequb_p:
+ case Intrinsic::ppc_altivec_vcmpequh_p:
+ case Intrinsic::ppc_altivec_vcmpequw_p:
+ case Intrinsic::ppc_altivec_vcmpgefp_p:
+ case Intrinsic::ppc_altivec_vcmpgtfp_p:
+ case Intrinsic::ppc_altivec_vcmpgtsb_p:
+ case Intrinsic::ppc_altivec_vcmpgtsh_p:
+ case Intrinsic::ppc_altivec_vcmpgtsw_p:
+ case Intrinsic::ppc_altivec_vcmpgtub_p:
+ case Intrinsic::ppc_altivec_vcmpgtuh_p:
+ case Intrinsic::ppc_altivec_vcmpgtuw_p:
+ KnownZero = ~1U; // All bits but the low one are known to be zero.
+ break;
+ }
+ }
+ }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ case 'Z':
+ // FIXME: While Z does indicate a memory constraint, it specifically
+ // indicates an r+r address (used in conjunction with the 'y' modifier
+ // in the replacement string). Currently, we're forcing the base
+ // register to be r0 in the asm printer (which is interpreted as zero)
+ // and forming the complete address in the second register. This is
+ // suboptimal.
+ return C_Memory;
+ }
+ } else if (Constraint == "wc") { // individual CR bits.
+ return C_RegisterClass;
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf" || Constraint == "ws") {
+ return C_RegisterClass; // VSX registers.
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+
+ // Look at the constraint type.
+ if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
+ return CW_Register; // an individual CR bit.
+ else if ((StringRef(constraint) == "wa" ||
+ StringRef(constraint) == "wd" ||
+ StringRef(constraint) == "wf") &&
+ type->isVectorTy())
+ return CW_Register;
+ else if (StringRef(constraint) == "ws" && type->isDoubleTy())
+ return CW_Register;
+
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'b':
+ if (type->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'f':
+ if (type->isFloatTy())
+ weight = CW_Register;
+ break;
+ case 'd':
+ if (type->isDoubleTy())
+ weight = CW_Register;
+ break;
+ case 'v':
+ if (type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'y':
+ weight = CW_Register;
+ break;
+ case 'Z':
+ weight = CW_Memory;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const {
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ if (VT == MVT::i64 && Subtarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
+ return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
+ case 'r': // R0-R31
+ if (VT == MVT::i64 && Subtarget.isPPC64())
+ return std::make_pair(0U, &PPC::G8RCRegClass);
+ return std::make_pair(0U, &PPC::GPRCRegClass);
+ case 'f':
+ if (VT == MVT::f32 || VT == MVT::i32)
+ return std::make_pair(0U, &PPC::F4RCRegClass);
+ if (VT == MVT::f64 || VT == MVT::i64)
+ return std::make_pair(0U, &PPC::F8RCRegClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, &PPC::VRRCRegClass);
+ case 'y': // crrc
+ return std::make_pair(0U, &PPC::CRRCRegClass);
+ }
+ } else if (Constraint == "wc") { // an individual CR bit.
+ return std::make_pair(0U, &PPC::CRBITRCRegClass);
+ } else if (Constraint == "wa" || Constraint == "wd" ||
+ Constraint == "wf") {
+ return std::make_pair(0U, &PPC::VSRCRegClass);
+ } else if (Constraint == "ws") {
+ return std::make_pair(0U, &PPC::VSFRCRegClass);
+ }
+
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first)) {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::G8RCRegClass),
+ &PPC::G8RCRegClass);
+ }
+
+ return R;
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue>&Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+
+ // Only support length 1 constraints.
+ if (Constraint.length() > 1) return;
+
+ char Letter = Constraint[0];
+ switch (Letter) {
+ default: break;
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ case 'O':
+ case 'P': {
+ ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+ if (!CST) return; // Must be an immediate to match.
+ unsigned Value = CST->getZExtValue();
+ switch (Letter) {
+ default: llvm_unreachable("Unknown constraint letter!");
+ case 'I': // "I" is a signed 16-bit constant.
+ if ((short)Value == (int)Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
+ case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
+ if ((short)Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
+ if ((Value >> 16) == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'M': // "M" is a constant that is greater than 31.
+ if (Value > 31)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'N': // "N" is a positive constant that is an exact power of two.
+ if ((int)Value > 0 && isPowerOf2_32(Value))
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'O': // "O" is the constant zero.
+ if (Value == 0)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
+ if ((short)-Value == (int)-Value)
+ Result = DAG.getTargetConstant(Value, Op.getValueType());
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ // Handle standard constraint letters.
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+ // PPC allows a sign-extended 16-bit immediate field.
+ if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+ return false;
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // PPC only support r+r,
+ switch (AM.Scale) {
+ case 0: // "r+i" or just "i", depending on HasBaseReg.
+ break;
+ case 1:
+ if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
+ return false;
+ // Otherwise we have r+r or r+i.
+ break;
+ case 2:
+ if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
+ return false;
+ // Allow 2*r as r+r.
+ break;
+ default:
+ // No other scales are supported.
+ return false;
+ }
+
+ return true;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ if (verifyReturnAddressArgumentIsConstant(Op, DAG))
+ return SDValue();
+
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ // Make sure the function does not optimize away the store of the RA to
+ // the stack.
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setLRStoreRequired();
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+
+ DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
+ isPPC64? MVT::i64 : MVT::i32);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Just load the return address off the stack.
+ SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, MachinePointerInfo(), false, false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ bool isPPC64 = PtrVT == MVT::i64;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ // Naked functions never have a frame pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned FrameReg;
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
+ else
+ FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
+
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+ PtrVT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+ FrameAddr, MachinePointerInfo(), false, false,
+ false, 0);
+ return FrameAddr;
+}
+
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ bool isPPC64 = Subtarget.isPPC64();
+ bool isDarwinABI = Subtarget.isDarwinABI();
+
+ if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
+ (!isPPC64 && VT != MVT::i32))
+ report_fatal_error("Invalid register global variable type");
+
+ bool is64Bit = isPPC64 && VT == MVT::i64;
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
+ .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+ .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
+ (is64Bit ? PPC::X13 : PPC::R13))
+ .Default(0);
+
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The PowerPC target isn't yet aware of offsets.
+ return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If 'IsMemset' is
+/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+/// source is constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ if (Subtarget.isPPC64()) {
+ return MVT::i64;
+ } else {
+ return MVT::i32;
+ }
+}
+
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 64)
+ return false;
+ return true;
+}
+
+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned,
+ bool *Fast) const {
+ if (DisablePPCUnaligned)
+ return false;
+
+ // PowerPC supports unaligned memory access for simple non-vector types.
+ // Although accessing unaligned addresses is not as efficient as accessing
+ // aligned addresses, it is generally more efficient than manual expansion,
+ // and generally only traps for software emulation when crossing page
+ // boundaries.
+
+ if (!VT.isSimple())
+ return false;
+
+ if (VT.getSimpleVT().isVector()) {
+ if (Subtarget.hasVSX()) {
+ if (VT != MVT::v2f64 && VT != MVT::v2i64)
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ if (VT == MVT::ppcf128)
+ return false;
+
+ if (Fast)
+ *Fast = true;
+
+ return true;
+}
+
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool
+PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
+ EVT VT , unsigned DefinedValues) const {
+ if (VT == MVT::v2i64)
+ return false;
+
+ return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
+}
+
+Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
+ if (DisableILPPref || Subtarget.enableMachineScheduler())
+ return TargetLowering::getSchedulingPreference(N);
+
+ return Sched::ILP;
+}
+
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 000000000000..74d3c651538e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,717 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+ namespace PPCISD {
+ enum NodeType {
+ // Start the numbering where the builtin ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ /// FSEL - Traditional three-operand fsel node.
+ ///
+ FSEL,
+
+ /// FCFID - The FCFID instruction, taking an f64 operand and producing
+ /// and f64 value containing the FP representation of the integer that
+ /// was temporarily in the f64 operand.
+ FCFID,
+
+ /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+ /// unsigned integers and single-precision outputs.
+ FCFIDU, FCFIDS, FCFIDUS,
+
+ /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+ /// operand, producing an f64 value containing the integer representation
+ /// of that FP value.
+ FCTIDZ, FCTIWZ,
+
+ /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+ /// unsigned integers.
+ FCTIDUZ, FCTIWUZ,
+
+ /// Reciprocal estimate instructions (unary FP ops).
+ FRE, FRSQRTE,
+
+ // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+ // three v4f32 operands and producing a v4f32 result.
+ VMADDFP, VNMSUBFP,
+
+ /// VPERM - The PPC VPERM Instruction.
+ ///
+ VPERM,
+
+ /// Hi/Lo - These represent the high and low 16-bit parts of a global
+ /// address respectively. These nodes have two operands, the first of
+ /// which must be a TargetGlobalAddress, and the second of which must be a
+ /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C',
+ /// though these are usually folded into other nodes.
+ Hi, Lo,
+
+ TOC_ENTRY,
+
+ /// The following two target-specific nodes are used for calls through
+ /// function pointers in the 64-bit SVR4 ABI.
+
+ /// Like a regular LOAD but additionally taking/producing a flag.
+ LOAD,
+
+ /// Like LOAD (taking/producing a flag), but using r2 as hard-coded
+ /// destination.
+ LOAD_TOC,
+
+ /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+ /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+ /// compute an allocation on the stack.
+ DYNALLOC,
+
+ /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+ /// at function entry, used for PIC code.
+ GlobalBaseReg,
+
+ /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+ /// shift amounts. These nodes are generated by the multi-precision shift
+ /// code.
+ SRL, SRA, SHL,
+
+ /// CALL - A direct function call.
+ /// CALL_NOP is a call with the special NOP which follows 64-bit
+ /// SVR4 calls.
+ CALL, CALL_NOP,
+
+ /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+ /// MTCTR instruction.
+ MTCTR,
+
+ /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+ /// BCTRL instruction.
+ BCTRL,
+
+ /// Return with a flag operand, matched by 'blr'
+ RET_FLAG,
+
+ /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFOCRF,
+
+ // FIXME: Remove these once the ANDI glue bug is fixed:
+ /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+ /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+ /// implement truncation of i32 or i64 to i1.
+ ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
+
+ // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+ EH_SJLJ_SETJMP,
+
+ // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+ EH_SJLJ_LONGJMP,
+
+ /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+ /// instructions. For lack of better number, we use the opcode number
+ /// encoding for the OPC field to identify the compare. For example, 838
+ /// is VCMPGTSH.
+ VCMP,
+
+ /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+ /// altivec VCMP*o instructions. For lack of better number, we use the
+ /// opcode number encoding for the OPC field to identify the compare. For
+ /// example, 838 is VCMPGTSH.
+ VCMPo,
+
+ /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+ /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the
+ /// condition register to branch on, OPC is the branch opcode to use (e.g.
+ /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+ /// an optional input flag argument.
+ COND_BRANCH,
+
+ /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+ /// loops.
+ BDNZ, BDZ,
+
+ /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+ /// towards zero. Used only as part of the long double-to-int
+ /// conversion sequence.
+ FADDRTZ,
+
+ /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+ MFFS,
+
+ /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+ /// reserve indexed. This is used to implement atomic operations.
+ LARX,
+
+ /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+ /// indexed. This is used to implement atomic operations.
+ STCX,
+
+ /// TC_RETURN - A tail call return.
+ /// operand #0 chain
+ /// operand #1 callee (register or absolute)
+ /// operand #2 stack adjustment
+ /// operand #3 optional in flag
+ TC_RETURN,
+
+ /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+ CR6SET,
+ CR6UNSET,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+ /// on PPC32.
+ PPC32_GOT,
+
+ /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+ /// local dynamic TLS on PPC32.
+ PPC32_PICGOT,
+
+ /// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
+ /// TLS model, produces an ADDIS8 instruction that adds the GOT
+ /// base to sym\@got\@tprel\@ha.
+ ADDIS_GOT_TPREL_HA,
+
+ /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+ /// TLS model, produces a LD instruction with base register G8RReg
+ /// and offset sym\@got\@tprel\@l. This completes the addition that
+ /// finds the offset of "sym" relative to the thread pointer.
+ LD_GOT_TPREL_L,
+
+ /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+ /// model, produces an ADD instruction that adds the contents of
+ /// G8RReg to the thread pointer. Symbol contains a relocation
+ /// sym\@tls which is to be replaced by the thread pointer and
+ /// identifies to the linker that the instruction is part of a
+ /// TLS sequence.
+ ADD_TLS,
+
+ /// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsgd\@ha.
+ ADDIS_TLSGD_HA,
+
+ /// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsgd\@l.
+ ADDI_TLSGD_L,
+
+ /// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd).
+ GET_TLS_ADDR,
+
+ /// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDIS8 instruction that adds the GOT base
+ /// register to sym\@got\@tlsld\@ha.
+ ADDIS_TLSLD_HA,
+
+ /// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@tlsld\@l.
+ ADDI_TLSLD_L,
+
+ /// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
+ /// model, produces a call to __tls_get_addr(sym\@tlsld).
+ GET_TLSLD_ADDR,
+
+ /// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
+ /// local-dynamic TLS model, produces an ADDIS8 instruction
+ /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
+ /// to tie this in place following a copy to %X3 from the result
+ /// of a GET_TLSLD_ADDR.
+ ADDIS_DTPREL_HA,
+
+ /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+ /// model, produces an ADDI8 instruction that adds G8RReg to
+ /// sym\@got\@dtprel\@l.
+ ADDI_DTPREL_L,
+
+ /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+ /// during instruction selection to optimize a BUILD_VECTOR into
+ /// operations on splats. This is necessary to avoid losing these
+ /// optimizations due to constant folding.
+ VADD_SPLAT,
+
+ /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
+ /// operand identifies the operating system entry point.
+ SC,
+
+ /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+ /// byte-swapping store instruction. It byte-swaps the low "Type" bits of
+ /// the GPRC input, then stores it through Ptr. Type can be either i16 or
+ /// i32.
+ STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+ /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+ /// byte-swapping load instruction. It loads "Type" bits, byte swaps it,
+ /// then puts it in the bottom bits of the GPRC. TYPE can be either i16
+ /// or i32.
+ LBRX,
+
+ /// STFIWX - The STFIWX instruction. The first operand is an input token
+ /// chain, then an f64 value to store, then an address to store it to.
+ STFIWX,
+
+ /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+ /// load which sign-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWAX,
+
+ /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+ /// load which zero-extends from a 32-bit integer value into the
+ /// destination 64-bit register.
+ LFIWZX,
+
+ /// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
+ /// produces an ADDIS8 instruction that adds the TOC base register to
+ /// sym\@toc\@ha.
+ ADDIS_TOC_HA,
+
+ /// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
+ /// produces a LD instruction with base register G8RReg and offset
+ /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ LD_TOC_L,
+
+ /// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
+ /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
+ /// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ ADDI_TOC_L
+ };
+ }
+
+ /// Define some predicates that are used for node matching.
+ namespace PPC {
+ /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUHUM instruction.
+ bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+ /// VPKUWUM instruction.
+ bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
+ /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+ /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+ bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+ unsigned ShuffleKind, SelectionDAG &DAG);
+
+ /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+ /// shift amount, otherwise return -1.
+ int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+ SelectionDAG &DAG);
+
+ /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+ /// specifies a splat of a single element that is suitable for input to
+ /// VSPLTB/VSPLTH/VSPLTW.
+ bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+ /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+ /// are -0.0.
+ bool isAllNegativeZeroVector(SDNode *N);
+
+ /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+ /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+ unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG);
+
+ /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+ /// formed by using a vspltis[bhw] instruction of the specified element
+ /// size, return the constant being splatted. The ByteSize field indicates
+ /// the number of bytes of each element [124] -> [bhw].
+ SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ }
+
+ class PPCSubtarget;
+ class PPCTargetLowering : public TargetLowering {
+ const PPCSubtarget &Subtarget;
+
+ public:
+ explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ const char *getTargetNodeName(unsigned Opcode) const override;
+
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+
+ /// getPreIndexedAddressParts - returns true by value, base pointer and
+ /// offset pointer and addressing mode by reference if the node's address
+ /// can be legally represented as pre-indexed load / store address.
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ /// SelectAddressRegReg - Given the specified addressed, check to see if it
+ /// can be represented as an indexed [r+r] operation. Returns false if it
+ /// can be more efficiently represented with [r+imm].
+ bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ /// SelectAddressRegImm - Returns true if the address N can be represented
+ /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+ /// is not better represented as reg+reg. If Aligned is true, only accept
+ /// displacements suitable for STD and friends, i.e. multiples of 4.
+ bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+ SelectionDAG &DAG, bool Aligned) const;
+
+ /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+ /// represented as an indexed [r+r] operation.
+ bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+ SelectionDAG &DAG) const;
+
+ Sched::Preference getSchedulingPreference(SDNode *N) const override;
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ ///
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+
+ /// ReplaceNodeResults - Replace the results of node with an illegal result
+ /// type with new values built out of custom code.
+ ///
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB, bool is64Bit,
+ unsigned BinOpcode) const;
+ MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ bool is8bit, unsigned Opcode) const;
+
+ MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
+ MachineBasicBlock *MBB) const;
+
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const override;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const override;
+
+ /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+ /// function arguments in the caller parameter area. This is the actual
+ /// alignment, not its logarithm.
+ unsigned getByValTypeAlignment(Type *Ty) const override;
+
+ /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+ /// vector. If it is invalid, don't add anything to Ops.
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalAddImmediate(int64_t Imm) const override;
+
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
+ /// register X1 to i32 by referencing its sub-register R1.
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+ /// \brief Returns true if it is beneficial to convert a load of a constant
+ /// to just the constant itself.
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
+
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove
+ /// lowering. If DstAlign is zero that means it's safe to destination
+ /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+ /// means there isn't a need to check it against alignment requirement,
+ /// probably because the source does not need to be loaded. If 'IsMemset' is
+ /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+ /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+ /// source is constant so it does not need to be loaded.
+ /// It returns EVT::Other if the type should be determined using generic
+ /// target-independent logic.
+ EVT
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
+
+ /// Is unaligned memory access allowed for the given type, and is it fast
+ /// relative to software emulation.
+ bool allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ bool *Fast = nullptr) const override;
+
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+
+ // Should we expand the build vector with shuffles?
+ bool
+ shouldExpandBuildVectorWithShuffles(EVT VT,
+ unsigned DefinedValues) const override;
+
+ /// createFastISel - This method returns a target-specific FastISel object,
+ /// or null if the target does not support "fast" instruction selection.
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const override;
+
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+ // We support any array type as "consecutive" block in the parameter
+ // save area. The element type defines the alignment requirement and
+ // whether the argument should go in GPRs, FPRs, or VRs if available.
+ //
+ // Note that clang uses this capability both to implement the ELFv2
+ // homogeneous float/vector aggregate ABI, and to avoid having to use
+ // "byval" when passing aggregates that might fully fit in registers.
+ return Ty->isArrayTy();
+ }
+
+ private:
+ SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+ SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+ bool
+ IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
+
+ SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+ int SPDiff,
+ SDValue Chain,
+ SDValue &LROpOut,
+ SDValue &FPOpOut,
+ bool isDarwinABI,
+ SDLoc dl) const;
+
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
+ bool isVarArg,
+ SelectionDAG &DAG,
+ SmallVector<std::pair<unsigned, SDValue>, 8>
+ &RegsToPass,
+ SDValue InFlag, SDValue Chain,
+ SDValue &Callee,
+ int SPDiff, unsigned NumBytes,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ SDValue
+ LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ bool
+ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const override;
+
+ SDValue
+ extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
+ SDValue ArgVal, SDLoc dl) const;
+
+ SDValue
+ LowerFormalArguments_Darwin(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_64SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerFormalArguments_32SVR4(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue
+ createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+ SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
+ SelectionDAG &DAG, SDLoc dl) const;
+
+ SDValue
+ LowerCall_Darwin(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+ SDValue
+ LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
+ SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+ CCAssignFn *useFastISelCCs(unsigned Flag) const;
+ };
+
+ namespace PPC {
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo);
+ }
+
+ bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+}
+
+#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 000000000000..9ed384f56244
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,1137 @@
+//===-- PPCInstr64Bit.td - The PowerPC 64-bit Support ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions. These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def u16imm64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+}
+def s17imm64 : Operand<i64> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def tocentry : Operand<iPTR> {
+ let MIOperandInfo = (ops i64imm:$imm);
+}
+def tlsreg : Operand<i64> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
+def tlsgd : Operand<i64> {}
+def tlscall : Operand<i64> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+ // Transformation function: 63 - imm
+ return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+ // Transformation function: 64 - imm
+ return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
+ def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+ def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+ def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ }
+}
+
+let Defs = [LR8] in
+ def MovePCtoLR8 : Pseudo<(outs), (ins), "#MovePCtoLR8", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let Defs = [CTR8], Uses = [CTR8] in {
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
+ }
+
+ let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
+ def BDZLR8 : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", IIC_BrB, []>;
+ def BDNZLR8 : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", IIC_BrB, []>;
+ }
+}
+
+
+
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+
+ def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
+
+ def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>;
+ }
+ let Uses = [RM], isCodeGenOnly = 1 in {
+ def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins calltarget:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+
+ def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins tlscall:$func),
+ "bl $func\n\tnop", IIC_BrB, []>;
+
+ def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
+ (outs), (ins abscalltarget:$func),
+ "bla $func\n\tnop", IIC_BrB,
+ [(PPCcall_nop (i64 imm:$func))]>;
+ }
+ let Uses = [CTR8, RM] in {
+ def BCTRL8 : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In64BitMode]>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+ def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>,
+ Requires<[In64BitMode]>;
+ }
+ }
+}
+} // Interpretation64Bit
+
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let Interpretation64Bit = 1, isAsmParserOnly = 1 in
+let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
+def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", IIC_BrB, []>;
+
+// Calls
+def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
+ (BL8 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_nop (i64 tglobaladdr:$dst)),
+ (BL8_NOP tglobaladdr:$dst)>;
+
+def : Pat<(PPCcall (i64 texternalsym:$dst)),
+ (BL8 texternalsym:$dst)>;
+def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
+ (BL8_NOP texternalsym:$dst)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_ADD_I64",
+ [(set i64:$dst, (atomic_load_add_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_SUB_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_SUB_I64",
+ [(set i64:$dst, (atomic_load_sub_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_OR_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_OR_I64",
+ [(set i64:$dst, (atomic_load_or_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_XOR_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_XOR_I64",
+ [(set i64:$dst, (atomic_load_xor_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_AND_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_AND_i64",
+ [(set i64:$dst, (atomic_load_and_64 xoaddr:$ptr, i64:$incr))]>;
+ def ATOMIC_LOAD_NAND_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$incr), "#ATOMIC_LOAD_NAND_I64",
+ [(set i64:$dst, (atomic_load_nand_64 xoaddr:$ptr, i64:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$old, g8rc:$new), "#ATOMIC_CMP_SWAP_I64",
+ [(set i64:$dst, (atomic_cmp_swap_64 xoaddr:$ptr, i64:$old, i64:$new))]>;
+
+ def ATOMIC_SWAP_I64 : Pseudo<
+ (outs g8rc:$dst), (ins memrr:$ptr, g8rc:$new), "#ATOMIC_SWAP_I64",
+ [(set i64:$dst, (atomic_swap_64 xoaddr:$ptr, i64:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
+ "ldarx $rD, $ptr", IIC_LdStLDARX,
+ [(set i64:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stdcx. $rS, $dst", IIC_LdStSTDCX,
+ [(PPCstcx i64:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset),
+ "#TC_RETURNd8 $dst $offset",
+ []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+ "#TC_RETURNa8 $func $offset",
+ [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset),
+ "#TC_RETURNr8 $dst $offset",
+ []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR8, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>,
+ Requires<[In64BitMode]>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", IIC_BrB,
+ []>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
+ "ba $dst", IIC_BrB,
+ []>;
+} // Interpretation64Bit
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+ (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+// 64-bit CR instructions
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+let neverHasSideEffects = 1 in {
+def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
+ "mfcr $rT", IIC_SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
+
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ let Defs = [CTR8] in
+ def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP64",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In64BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp64 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP64",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
+ "mfctr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let hasSideEffects = 1, Defs = [CTR8] in {
+let Pattern = [(int_ppc_mtctr i64:$rS)] in
+def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Pattern = [(set i64:$rT, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
+ "mfspr $rT, 268", IIC_SprMFTB>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+// Note that encoding mftb using mfspr is now the preferred form,
+// and has been since at least ISA v2.03. The mftb instruction has
+// now been phased out. Using mfspr, however, is known not to work on
+// the POWER3.
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs g8rc:$result), (ins g8rc:$negsize, memri:$fpsi),"#DYNALLOC8",
+ [(set i64:$result,
+ (PPCdynalloc i64:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
+ "mtlr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
+ "mflr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+} // Interpretation64Bit
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Interpretation64Bit = 1 in {
+let neverHasSideEffects = 1 in {
+let isCodeGenOnly = 1 in {
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
+ "li $rD, $imm", IIC_IntSimple,
+ [(set i64:$rD, imm64SExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm),
+ "lis $rD, $imm", IIC_IntSimple,
+ [(set i64:$rD, imm16ShiftedSExt:$imm)]>;
+}
+
+// Logical ops.
+let isCommutable = 1 in {
+defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
+defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+} // isCommutable
+defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
+defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+} // isCommutable
+defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+let isCommutable = 1 in {
+defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+} // let isCommutable = 1
+
+// Logical ops with immediate.
+let Defs = [CR0] in {
+def ANDIo8 : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo8 : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
+ isDOT;
+}
+def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "ori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
+def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "oris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
+def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
+
+let isCommutable = 1 in
+defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
+ [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
+// initial-exec thread-local storage model.
+def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple,
+ [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
+
+let isCommutable = 1 in
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+ PPC970_DGroup_Cracked;
+
+let Defs = [CARRY] in
+def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
+def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
+ "addi $rD, $rA, $imm", IIC_IntSimple,
+ [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
+ "addis $rD, $rA, $imm", IIC_IntSimple,
+ [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
+defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+ PPC970_DGroup_Cracked;
+}
+defm SUBF8 : XOForm_1r<31, 40, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
+defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "neg", "$rT, $rA", IIC_IntSimple,
+ [(set i64:$rT, (ineg i64:$rA))]>;
+let Uses = [CARRY] in {
+let isCommutable = 1 in
+defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
+defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "addme", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, -1))]>;
+defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "addze", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (adde i64:$rA, 0))]>;
+defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "subfme", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (sube -1, i64:$rA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
+ "subfze", "$rT, $rA", IIC_IntGeneral,
+ [(set i64:$rT, (sube 0, i64:$rA))]>;
+}
+} // isCodeGenOnly
+
+// FIXME: Duplicating this for the asm parser should be unnecessary, but the
+// previous definition must be marked as CodeGen only to prevent decoding
+// conflicts.
+let isAsmParserOnly = 1 in
+def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple, []>;
+
+let isCommutable = 1 in {
+defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulhd", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU,
+ [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+} // isCommutable
+}
+} // Interpretation64Bit
+
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+ "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
+ def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
+ "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
+ def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm),
+ "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64;
+ def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2),
+ "cmpldi $dst, $src1, $src2",
+ IIC_IntCompare>, isPPC64;
+}
+
+let neverHasSideEffects = 1 in {
+defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "sld", "$rA, $rS, $rB", IIC_IntRotateD,
+ [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "srd", "$rA, $rS, $rB", IIC_IntRotateD,
+ [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
+ "srad", "$rA, $rS, $rB", IIC_IntRotateD,
+ [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsb", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i8))]>;
+defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsh", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
+} // Interpretation64Bit
+
+// For fast-isel:
+let isCodeGenOnly = 1 in {
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64;
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64;
+} // isCodeGenOnly for fast-isel
+
+defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
+ "extsw", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsw", "$rA, $rS", IIC_IntSimple,
+ [(set i64:$rA, (sext i32:$rS))]>, isPPC64;
+
+defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
+ "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
+ [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
+ "cntlzd", "$rA, $rS", IIC_IntGeneral,
+ [(set i64:$rA, (ctlz i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntd $rA, $rS", IIC_IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
+
+// popcntw also does a population count on the high 32 bits (storing the
+// results in the high 32-bits of the output). We'll ignore that here (which is
+// safe because we never separately use the high part of the 64-bit registers).
+def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+ "popcntw $rA, $rS", IIC_IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
+
+defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divd", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "divdu", "$rT, $rA, $rB", IIC_IntDivD,
+ [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+let isCommutable = 1 in
+defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
+ "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
+ [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
+ [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
+}
+
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64, RegConstraint<"$rSi = $rA">,
+ NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+defm RLDCL : MDSForm_1r<30, 8,
+ (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+ "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
+ []>, isPPC64;
+defm RLDCR : MDSForm_1r<30, 9,
+ (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+ "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
+ []>, isPPC64;
+defm RLDICL : MDForm_1r<30, 0,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def RLDICL_32_64 : MDForm_1<30, 0,
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+// End fast-isel.
+defm RLDICR : MDForm_1r<30, 1,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+defm RLDIC : MDForm_1r<30, 2,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64;
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
+ (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ []>;
+
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
+ (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+}
+
+let isSelect = 1 in
+def ISEL8 : AForm_4<31, 15,
+ (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
+ "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+ []>;
+} // Interpretation64Bit
+} // neverHasSideEffects = 1
+} // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
+ "lha $rD, $src", IIC_LdStLHA,
+ [(set i64:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", IIC_LdStLWA,
+ [(set i64:$rD,
+ (aligned4sextloadi32 ixaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHAX8: XForm_1<31, 343, (outs g8rc:$rD), (ins memrr:$src),
+ "lhax $rD, $src", IIC_LdStLHA,
+ [(set i64:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", IIC_LdStLHA,
+ [(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+ PPC970_DGroup_Cracked;
+// For fast-isel:
+let isCodeGenOnly = 1, mayLoad = 1 in {
+def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+} // end fast-isel isCodeGenOnly
+
+// Update forms.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memri:$addr),
+ "lhau $rD, $addr", IIC_LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+// NO LWAU!
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+def LHAUX8 : XForm_1<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWAUX : XForm_1<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwaux $rD, $addr", IIC_LdStLHAUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
+}
+}
+
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
+ "lbz $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
+ "lhz $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
+ "lwz $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31, 87, (outs g8rc:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs g8rc:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31, 23, (outs g8rc:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (zextloadi32 xaddr:$src))]>;
+
+
+// Update forms.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LBZUX8 : XForm_1<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LHZUX8 : XForm_1<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+def LWZUX8 : XForm_1<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+} // Interpretation64Bit
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
+ "ld $rD, $src", IIC_LdStLD,
+ [(set i64:$rD, (aligned4load ixaddr:$src))]>, isPPC64;
+// The following three definitions are selected for small code model only.
+// Otherwise, we need to create two instructions to form a 32-bit offset,
+// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
+def LDtoc: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtoc",
+ [(set i64:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def LDtocJTI: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocJTI",
+ [(set i64:$rD,
+ (PPCtoc_entry tjumptable:$disp, i64:$reg))]>, isPPC64;
+def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
+ "#LDtocCPT",
+ [(set i64:$rD,
+ (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64;
+
+let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
+ "ld 2, $src", IIC_LdStLD,
+ [(PPCload_toc ixaddr:$src)]>, isPPC64;
+
+def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
+ "ldx $rD, $src", IIC_LdStLD,
+ [(set i64:$rD, (load xaddr:$src))]>, isPPC64;
+def LDBRX : XForm_1<31, 532, (outs g8rc:$rD), (ins memrr:$src),
+ "ldbrx $rD, $src", IIC_LdStLoad,
+ [(set i64:$rD, (PPClbrx xoaddr:$src, i64))]>, isPPC64;
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result), (ins memrix:$addr),
+ "ldu $rD, $addr", IIC_LdStLDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+ NoEncode<"$ea_result">;
+
+def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "ldux $rD, $addr", IIC_LdStLDUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">, isPPC64;
+}
+}
+
+def : Pat<(PPCload ixaddr:$src),
+ (LD ixaddr:$src)>;
+def : Pat<(PPCload xaddr:$src),
+ (LDX xaddr:$src)>;
+
+// Support for medium and large code model.
+def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDIStocHA",
+ [(set i64:$rD,
+ (PPCaddisTocHA i64:$reg, tglobaladdr:$disp))]>,
+ isPPC64;
+def LDtocL: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc_nox0:$reg),
+ "#LDtocL",
+ [(set i64:$rD,
+ (PPCldTocL tglobaladdr:$disp, i64:$reg))]>, isPPC64;
+def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
+ "#ADDItocL",
+ [(set i64:$rD,
+ (PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
+
+// Support for thread-local storage.
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDISgotTprelHA",
+ [(set i64:$rD,
+ (PPCaddisGotTprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
+ "#LDgotTprelL",
+ [(set i64:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
+ isPPC64;
+def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
+ (ADD8TLS $in, tglobaltlsaddr:$g)>;
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDIStlsgdHA",
+ [(set i64:$rD,
+ (PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDItlsgdL",
+ [(set i64:$rD,
+ (PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsADDR",
+ [(set i64:$rD,
+ (PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDIStlsldHA",
+ [(set i64:$rD,
+ (PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDItlsldL",
+ [(set i64:$rD,
+ (PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
+ "#GETtlsldADDR",
+ [(set i64:$rD,
+ (PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
+ isPPC64;
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDISdtprelHA",
+ [(set i64:$rD,
+ (PPCaddisDtprelHA i64:$reg,
+ tglobaltlsaddr:$disp))]>,
+ isPPC64;
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
+ "#ADDIdtprelL",
+ [(set i64:$rD,
+ (PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
+ isPPC64;
+
+let PPC970_Unit = 2 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+// Truncating stores.
+def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
+ "stb $rS, $src", IIC_LdStStore,
+ [(truncstorei8 i64:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
+ "sth $rS, $src", IIC_LdStStore,
+ [(truncstorei16 i64:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
+ "stw $rS, $src", IIC_LdStStore,
+ [(truncstorei32 i64:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stbx $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
+ "sthx $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stwx $rS, $dst", IIC_LdStStore,
+ [(truncstorei32 i64:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
+// Normal 8-byte stores.
+def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
+ "std $rS, $dst", IIC_LdStSTD,
+ [(aligned4store i64:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX : XForm_8<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stdx $rS, $dst", IIC_LdStSTD,
+ [(store i64:$rS, xaddr:$dst)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+def STDBRX: XForm_8<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
+ "stdbrx $rS, $dst", IIC_LdStStore,
+ [(PPCstbrx i64:$rS, xoaddr:$dst, i64)]>, isPPC64,
+ PPC970_DGroup_Cracked;
+}
+
+// Stores with Update (pre-inc).
+let PPC970_Unit = 2, mayStore = 1 in {
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+
+def STBUX8: XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX8: XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX8: XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+} // Interpretation64Bit
+
+def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrix:$dst),
+ "stdu $rS, $dst", IIC_LdStSTDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ isPPC64;
+
+def STDUX : XForm_8<31, 181, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memrr:$dst),
+ "stdux $rS, $dst", IIC_LdStSTDUX, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked, isPPC64;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU8 $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(aligned4pre_store i64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+def : Pat<(pre_truncsti8 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti32 i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX8 $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STDUX $rS, $ptrreg, $ptroff)>;
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, neverHasSideEffects = 1,
+ Uses = [RM] in { // FPU Operations.
+defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fcfid", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctid", "$frD, $frB", IIC_FPGeneral,
+ []>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctidz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
+
+defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fcfidu", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfcfidu f64:$frB))]>, isPPC64;
+defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
+ "fcfids", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfcfids f64:$frB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
+ "fcfidus", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfcfidus f64:$frB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiduz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctiduz f64:$frB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwuz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctiwuz f64:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext i32:$in)),
+ (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
+ 0, 32)>;
+def : Pat<(i64 (anyext i32:$in)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32)>;
+def : Pat<(i32 (trunc i64:$in)),
+ (EXTRACT_SUBREG $in, sub_32)>;
+
+// Implement the 'not' operation with the NOR instruction.
+// (we could use the default xori pattern, but nor has lower latency on some
+// cores (such as the A2)).
+def i64not : OutPatFrag<(ops node:$in),
+ (NOR8 $in, $in)>;
+def : Pat<(not i64:$in),
+ (i64not $in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+ (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+ (LWZX8 xaddr:$src)>;
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra i64:$rS, i32:$rB),
+ (SRAD $rS, $rB)>;
+def : Pat<(srl i64:$rS, i32:$rB),
+ (SRD $rS, $rB)>;
+def : Pat<(shl i64:$rS, i32:$rB),
+ (SLD $rS, $rB)>;
+
+// SHL/SRL
+def : Pat<(shl i64:$in, (i32 imm:$imm)),
+ (RLDICR $in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl i64:$in, i32:$sh),
+ (RLDCL $in, $sh, 0)>;
+def : Pat<(rotl i64:$in, (i32 imm:$imm)),
+ (RLDICL $in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8 tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8 tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8 tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8 tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i64:$in),
+ (ADDIS8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i64:$in),
+ (ADDI8 $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i64:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS8 $in, tglobaladdr:$g)>;
+def : Pat<(add i64:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS8 $in, tconstpool:$g)>;
+def : Pat<(add i64:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS8 $in, tjumptable:$g)>;
+def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS8 $in, tblockaddress:$g)>;
+
+// Patterns to match r+r indexed loads and stores for
+// addresses without at least 4-byte alignment.
+def : Pat<(i64 (unaligned4sextloadi32 xoaddr:$src)),
+ (LWAX xoaddr:$src)>;
+def : Pat<(i64 (unaligned4load xoaddr:$src)),
+ (LDX xoaddr:$src)>;
+def : Pat<(unaligned4store i64:$rS, xoaddr:$dst),
+ (STDX $rS, xoaddr:$dst)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 000000000000..b271b5d5aa21
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,939 @@
+//===-- PPCInstrAltivec.td - The PowerPC Altivec Extension -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 0, *CurDAG);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 1, *CurDAG);
+}]>;
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vpkuhum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+def vpkuwum_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), 2, *CurDAG);
+}]>;
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 0, *CurDAG);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 0, *CurDAG);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 0, *CurDAG);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 1, *CurDAG);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 1, *CurDAG);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 1, *CurDAG);
+}]>;
+
+
+// These fragments are provided for little-endian, where the inputs must be
+// swapped for correct semantics.
+def vmrglb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrglh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrglw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+def vmrghb_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, 2, *CurDAG);
+}]>;
+def vmrghh_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, 2, *CurDAG);
+}]>;
+def vmrghw_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, 2, *CurDAG);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 0, *CurDAG));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 0, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 1, *CurDAG));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 1, *CurDAG) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+/// VSLDOI_swapped* - These fragments are provided for little-endian, where
+/// the inputs must be swapped for correct semantics.
+def VSLDOI_swapped_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::isVSLDOIShuffleMask(N, 2, *CurDAG));
+}]>;
+def vsldoi_swapped_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isVSLDOIShuffleMask(N, 2, *CurDAG) != -1;
+}], VSLDOI_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+ return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+ return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
+class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
+
+// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+
+// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
+// input types and an output type.
+class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+ !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
+ [(set OutTy:$vD,
+ (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+
+// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
+class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
+ [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+
+// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
+
+// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
+// input types and an output type.
+class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType In1Ty, ValueType In2Ty>
+ : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+
+// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
+class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
+ : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
+ [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+
+// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
+// inputs doesn't match the type of the output.
+class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
+ ValueType InTy>
+ : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
+ !strconcat(opc, " $vD, $vB"), IIC_VecFP,
+ [(set OutTy:$vD, (IntID InTy:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
+let Predicates = [HasAltivec] in {
+
+let isCodeGenOnly = 1 in {
+def DSS : DSS_Form<822, (outs),
+ (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dss $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSSALL : DSS_Form<822, (outs),
+ (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+ "dssall", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DST : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSTT : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSTST : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSTSTT : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+
+def DST64 : DSS_Form<342, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSTT64 : DSS_Form<342, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSTST64 : DSS_Form<374, (outs),
+ (ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+def DSTSTT64 : DSS_Form<374, (outs),
+ (ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
+ "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
+}
+
+def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
+ "mfvscr $vD", IIC_LdStStore,
+ [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
+def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
+ "mtvscr $vB", IIC_LdStLoad,
+ [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in { // Loads.
+def LVEBX: XForm_1<31, 7, (outs vrrc:$vD), (ins memrr:$src),
+ "lvebx $vD, $src", IIC_LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31, 39, (outs vrrc:$vD), (ins memrr:$src),
+ "lvehx $vD, $src", IIC_LdStLoad,
+ [(set v8i16:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31, 71, (outs vrrc:$vD), (ins memrr:$src),
+ "lvewx $vD, $src", IIC_LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX : XForm_1<31, 103, (outs vrrc:$vD), (ins memrr:$src),
+ "lvx $vD, $src", IIC_LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs vrrc:$vD), (ins memrr:$src),
+ "lvxl $vD, $src", IIC_LdStLoad,
+ [(set v4i32:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31, 6, (outs vrrc:$vD), (ins memrr:$src),
+ "lvsl $vD, $src", IIC_LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+def LVSR : XForm_1<31, 38, (outs vrrc:$vD), (ins memrr:$src),
+ "lvsr $vD, $src", IIC_LdStLoad,
+ [(set v16i8:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+ PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in { // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvebx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvebx v16i8:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvehx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvehx v8i16:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvewx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvewx v4i32:$rS, xoaddr:$dst)]>;
+def STVX : XForm_8<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvx $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvx v4i32:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
+ "stvxl $rS, $dst", IIC_LdStStore,
+ [(int_ppc_altivec_stvxl v4i32:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in { // VALU Operations.
+// VA-Form instructions. 3-input AltiVec ops.
+let isCommutable = 1 in {
+def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
+ "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP,
+ [(set v4f32:$vD,
+ (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+
+// FIXME: The fma+fneg pattern won't match because fneg is not legal.
+def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
+ "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
+ [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
+ (fneg v4f32:$vB))))]>;
+
+def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
+def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
+ v8i16>;
+def VMLADDUHM : VA1a_Int_Ty<34, "vmladduhm", int_ppc_altivec_vmladduhm, v8i16>;
+} // isCommutable
+
+def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
+ v4i32, v4i32, v16i8>;
+def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
+
+// Shuffles.
+def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
+ "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
+
+// VX-Form instructions. AltiVec arithmetic ops.
+let isCommutable = 1 in {
+def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddfp $vD, $vA, $vB", IIC_VecFP,
+ [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
+
+def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vaddubm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vadduhm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vadduwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
+
+def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
+def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
+def VADDSHS : VX1_Int_Ty<832, "vaddshs", int_ppc_altivec_vaddshs, v8i16>;
+def VADDSWS : VX1_Int_Ty<896, "vaddsws", int_ppc_altivec_vaddsws, v4i32>;
+def VADDUBS : VX1_Int_Ty<512, "vaddubs", int_ppc_altivec_vaddubs, v16i8>;
+def VADDUHS : VX1_Int_Ty<576, "vadduhs", int_ppc_altivec_vadduhs, v8i16>;
+def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
+} // isCommutable
+
+let isCommutable = 1 in
+def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vand $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
+def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vandc $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (and v4i32:$vA,
+ (vnot_ppc v4i32:$vB)))]>;
+
+def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vcfsx $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, imm:$UIMM))]>;
+def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vcfux $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vctsxs $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vctuxs $vD, $vB, $UIMM", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, imm:$UIMM))]>;
+
+// Defines with the UIM field set to 0 for floating-point
+// to integer (fp_to_sint/fp_to_uint) conversions and integer
+// to floating-point (sint_to_fp/uint_to_fp) conversions.
+let isCodeGenOnly = 1, VA = 0 in {
+def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vcfsx $vD, $vB, 0", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
+def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vctuxs $vD, $vB, 0", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
+def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vcfux $vD, $vB, 0", IIC_VecFP,
+ [(set v4f32:$vD,
+ (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
+def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
+ "vctsxs $vD, $vB, 0", IIC_VecFP,
+ [(set v4i32:$vD,
+ (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
+}
+def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
+
+let isCommutable = 1 in {
+def VAVGSB : VX1_Int_Ty<1282, "vavgsb", int_ppc_altivec_vavgsb, v16i8>;
+def VAVGSH : VX1_Int_Ty<1346, "vavgsh", int_ppc_altivec_vavgsh, v8i16>;
+def VAVGSW : VX1_Int_Ty<1410, "vavgsw", int_ppc_altivec_vavgsw, v4i32>;
+def VAVGUB : VX1_Int_Ty<1026, "vavgub", int_ppc_altivec_vavgub, v16i8>;
+def VAVGUH : VX1_Int_Ty<1090, "vavguh", int_ppc_altivec_vavguh, v8i16>;
+def VAVGUW : VX1_Int_Ty<1154, "vavguw", int_ppc_altivec_vavguw, v4i32>;
+
+def VMAXFP : VX1_Int_Ty<1034, "vmaxfp", int_ppc_altivec_vmaxfp, v4f32>;
+def VMAXSB : VX1_Int_Ty< 258, "vmaxsb", int_ppc_altivec_vmaxsb, v16i8>;
+def VMAXSH : VX1_Int_Ty< 322, "vmaxsh", int_ppc_altivec_vmaxsh, v8i16>;
+def VMAXSW : VX1_Int_Ty< 386, "vmaxsw", int_ppc_altivec_vmaxsw, v4i32>;
+def VMAXUB : VX1_Int_Ty< 2, "vmaxub", int_ppc_altivec_vmaxub, v16i8>;
+def VMAXUH : VX1_Int_Ty< 66, "vmaxuh", int_ppc_altivec_vmaxuh, v8i16>;
+def VMAXUW : VX1_Int_Ty< 130, "vmaxuw", int_ppc_altivec_vmaxuw, v4i32>;
+def VMINFP : VX1_Int_Ty<1098, "vminfp", int_ppc_altivec_vminfp, v4f32>;
+def VMINSB : VX1_Int_Ty< 770, "vminsb", int_ppc_altivec_vminsb, v16i8>;
+def VMINSH : VX1_Int_Ty< 834, "vminsh", int_ppc_altivec_vminsh, v8i16>;
+def VMINSW : VX1_Int_Ty< 898, "vminsw", int_ppc_altivec_vminsw, v4i32>;
+def VMINUB : VX1_Int_Ty< 514, "vminub", int_ppc_altivec_vminub, v16i8>;
+def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
+def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
+} // isCommutable
+
+def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrghb $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrghh $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrghw $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrglb $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrglh $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vmrglw $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+
+def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
+ v4i32, v16i8, v4i32>;
+def VMSUMSHM : VA1a_Int_Ty3<40, "vmsumshm", int_ppc_altivec_vmsumshm,
+ v4i32, v8i16, v4i32>;
+def VMSUMSHS : VA1a_Int_Ty3<41, "vmsumshs", int_ppc_altivec_vmsumshs,
+ v4i32, v8i16, v4i32>;
+def VMSUMUBM : VA1a_Int_Ty3<36, "vmsumubm", int_ppc_altivec_vmsumubm,
+ v4i32, v16i8, v4i32>;
+def VMSUMUHM : VA1a_Int_Ty3<38, "vmsumuhm", int_ppc_altivec_vmsumuhm,
+ v4i32, v8i16, v4i32>;
+def VMSUMUHS : VA1a_Int_Ty3<39, "vmsumuhs", int_ppc_altivec_vmsumuhs,
+ v4i32, v8i16, v4i32>;
+
+let isCommutable = 1 in {
+def VMULESB : VX1_Int_Ty2<776, "vmulesb", int_ppc_altivec_vmulesb,
+ v8i16, v16i8>;
+def VMULESH : VX1_Int_Ty2<840, "vmulesh", int_ppc_altivec_vmulesh,
+ v4i32, v8i16>;
+def VMULEUB : VX1_Int_Ty2<520, "vmuleub", int_ppc_altivec_vmuleub,
+ v8i16, v16i8>;
+def VMULEUH : VX1_Int_Ty2<584, "vmuleuh", int_ppc_altivec_vmuleuh,
+ v4i32, v8i16>;
+def VMULOSB : VX1_Int_Ty2<264, "vmulosb", int_ppc_altivec_vmulosb,
+ v8i16, v16i8>;
+def VMULOSH : VX1_Int_Ty2<328, "vmulosh", int_ppc_altivec_vmulosh,
+ v4i32, v8i16>;
+def VMULOUB : VX1_Int_Ty2< 8, "vmuloub", int_ppc_altivec_vmuloub,
+ v8i16, v16i8>;
+def VMULOUH : VX1_Int_Ty2< 72, "vmulouh", int_ppc_altivec_vmulouh,
+ v4i32, v8i16>;
+} // isCommutable
+
+def VREFP : VX2_Int_SP<266, "vrefp", int_ppc_altivec_vrefp>;
+def VRFIM : VX2_Int_SP<714, "vrfim", int_ppc_altivec_vrfim>;
+def VRFIN : VX2_Int_SP<522, "vrfin", int_ppc_altivec_vrfin>;
+def VRFIP : VX2_Int_SP<650, "vrfip", int_ppc_altivec_vrfip>;
+def VRFIZ : VX2_Int_SP<586, "vrfiz", int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
+
+def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubfp $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsububm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubuhm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
+ [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
+
+def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
+def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
+def VSUBSWS : VX1_Int_Ty<1920, "vsubsws" , int_ppc_altivec_vsubsws, v4i32>;
+def VSUBUBS : VX1_Int_Ty<1536, "vsububs" , int_ppc_altivec_vsububs, v16i8>;
+def VSUBUHS : VX1_Int_Ty<1600, "vsubuhs" , int_ppc_altivec_vsubuhs, v8i16>;
+def VSUBUWS : VX1_Int_Ty<1664, "vsubuws" , int_ppc_altivec_vsubuws, v4i32>;
+
+def VSUMSWS : VX1_Int_Ty<1928, "vsumsws" , int_ppc_altivec_vsumsws, v4i32>;
+def VSUM2SWS: VX1_Int_Ty<1672, "vsum2sws", int_ppc_altivec_vsum2sws, v4i32>;
+
+def VSUM4SBS: VX1_Int_Ty3<1800, "vsum4sbs", int_ppc_altivec_vsum4sbs,
+ v4i32, v16i8, v4i32>;
+def VSUM4SHS: VX1_Int_Ty3<1608, "vsum4shs", int_ppc_altivec_vsum4shs,
+ v4i32, v8i16, v4i32>;
+def VSUM4UBS: VX1_Int_Ty3<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs,
+ v4i32, v16i8, v4i32>;
+
+def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vnor $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (vnot_ppc (or v4i32:$vA,
+ v4i32:$vB)))]>;
+let isCommutable = 1 in {
+def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vor $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
+def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vxor $vD, $vA, $vB", IIC_VecFP,
+ [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+} // isCommutable
+
+def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
+def VRLH : VX1_Int_Ty< 68, "vrlh", int_ppc_altivec_vrlh, v8i16>;
+def VRLW : VX1_Int_Ty< 132, "vrlw", int_ppc_altivec_vrlw, v4i32>;
+
+def VSL : VX1_Int_Ty< 452, "vsl" , int_ppc_altivec_vsl, v4i32 >;
+def VSLO : VX1_Int_Ty<1036, "vslo", int_ppc_altivec_vslo, v4i32>;
+
+def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
+def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
+def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
+
+def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vspltb $vD, $vB, $UIMM", IIC_VecPerm,
+ [(set v16i8:$vD,
+ (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vsplth $vD, $vB, $UIMM", IIC_VecPerm,
+ [(set v16i8:$vD,
+ (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
+ "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
+ [(set v16i8:$vD,
+ (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+
+def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
+def VSRO : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
+
+def VSRAB : VX1_Int_Ty< 772, "vsrab", int_ppc_altivec_vsrab, v16i8>;
+def VSRAH : VX1_Int_Ty< 836, "vsrah", int_ppc_altivec_vsrah, v8i16>;
+def VSRAW : VX1_Int_Ty< 900, "vsraw", int_ppc_altivec_vsraw, v4i32>;
+def VSRB : VX1_Int_Ty< 516, "vsrb" , int_ppc_altivec_vsrb , v16i8>;
+def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
+def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
+
+
+def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
+ "vspltisb $vD, $SIMM", IIC_VecPerm,
+ [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
+ "vspltish $vD, $SIMM", IIC_VecPerm,
+ [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
+ "vspltisw $vD, $SIMM", IIC_VecPerm,
+ [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
+ v8i16, v4i32>;
+def VPKSHSS : VX1_Int_Ty2<398, "vpkshss", int_ppc_altivec_vpkshss,
+ v16i8, v8i16>;
+def VPKSHUS : VX1_Int_Ty2<270, "vpkshus", int_ppc_altivec_vpkshus,
+ v16i8, v8i16>;
+def VPKSWSS : VX1_Int_Ty2<462, "vpkswss", int_ppc_altivec_vpkswss,
+ v16i8, v4i32>;
+def VPKSWUS : VX1_Int_Ty2<334, "vpkswus", int_ppc_altivec_vpkswus,
+ v8i16, v4i32>;
+def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpkuhum $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUS : VX1_Int_Ty2<142, "vpkuhus", int_ppc_altivec_vpkuhus,
+ v16i8, v8i16>;
+def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+ "vpkuwum $vD, $vA, $vB", IIC_VecFP,
+ [(set v16i8:$vD,
+ (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
+ v8i16, v4i32>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
+ v4i32, v8i16>;
+def VUPKHSB : VX2_Int_Ty2<526, "vupkhsb", int_ppc_altivec_vupkhsb,
+ v8i16, v16i8>;
+def VUPKHSH : VX2_Int_Ty2<590, "vupkhsh", int_ppc_altivec_vupkhsh,
+ v4i32, v8i16>;
+def VUPKLPX : VX2_Int_Ty2<974, "vupklpx", int_ppc_altivec_vupklpx,
+ v4i32, v8i16>;
+def VUPKLSB : VX2_Int_Ty2<654, "vupklsb", int_ppc_altivec_vupklsb,
+ v8i16, v16i8>;
+def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
+ v4i32, v8i16>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
+ [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+ : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ IIC_VecFPCompare,
+ [(set Ty:$vD, (Ty (PPCvcmp_o Ty:$vA, Ty:$vB, xo)))]> {
+ let Defs = [CR6];
+ let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
+def VCMPBFPo : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+
+let isCodeGenOnly = 1 in {
+def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", IIC_VecFP,
+ [(set v16i8:$vD, (v16i8 immAllZerosV))]>;
+def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", IIC_VecFP,
+ [(set v8i16:$vD, (v8i16 immAllZerosV))]>;
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", IIC_VecFP,
+ [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+
+let IMM=-1 in {
+def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", IIC_VecFP,
+ [(set v16i8:$vD, (v16i8 immAllOnesV))]>;
+def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", IIC_VecFP,
+ [(set v8i16:$vD, (v8i16 immAllOnesV))]>;
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", IIC_VecFP,
+ [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
+}
+}
+} // VALU Operations.
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+// * 32-bit
+def : Pat<(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM),
+ (DST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTT 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM),
+ (DSTST 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT 1, imm:$STRM, $rA, $rB)>;
+
+// * 64-bit
+def : Pat<(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM),
+ (DST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTT64 1, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dstst i64:$rA, i32:$rB, imm:$STRM),
+ (DSTST64 0, imm:$STRM, $rA, $rB)>;
+def : Pat<(int_ppc_altivec_dststt i64:$rA, i32:$rB, imm:$STRM),
+ (DSTSTT64 1, imm:$STRM, $rA, $rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store v4i32:$rS, xoaddr:$dst),
+ (STVX $rS, xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in v16i8:$vA, undef),
+ (VSLDOI $vA, $vA, (VSLDOI_unary_get_imm $in))>;
+def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
+ (VPKUWUM $vA, $vA)>;
+def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
+ (VPKUHUM $vA, $vA)>;
+
+// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
+// These fragments are matched for little-endian, where the inputs must
+// be swapped for correct semantics.
+def:Pat<(vsldoi_swapped_shuffle:$in v16i8:$vA, v16i8:$vB),
+ (VSLDOI $vB, $vA, (VSLDOI_swapped_get_imm $in))>;
+def:Pat<(vpkuwum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUWUM $vB, $vA)>;
+def:Pat<(vpkuhum_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VPKUHUM $vB, $vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle v16i8:$vA, undef),
+ (VMRGLB $vA, $vA)>;
+def:Pat<(vmrglh_unary_shuffle v16i8:$vA, undef),
+ (VMRGLH $vA, $vA)>;
+def:Pat<(vmrglw_unary_shuffle v16i8:$vA, undef),
+ (VMRGLW $vA, $vA)>;
+def:Pat<(vmrghb_unary_shuffle v16i8:$vA, undef),
+ (VMRGHB $vA, $vA)>;
+def:Pat<(vmrghh_unary_shuffle v16i8:$vA, undef),
+ (VMRGHH $vA, $vA)>;
+def:Pat<(vmrghw_unary_shuffle v16i8:$vA, undef),
+ (VMRGHW $vA, $vA)>;
+
+// Match vmrg*(y,x), i.e., swapped operands. These fragments
+// are matched for little-endian, where the inputs must be
+// swapped for correct semantics.
+def:Pat<(vmrglb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLB $vB, $vA)>;
+def:Pat<(vmrglh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLH $vB, $vA)>;
+def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGLW $vB, $vA)>;
+def:Pat<(vmrghb_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHB $vB, $vA)>;
+def:Pat<(vmrghh_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHH $vB, $vA)>;
+def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
+ (VMRGHW $vB, $vA)>;
+
+// Logical Operations
+def : Pat<(vnot_ppc v4i32:$vA), (VNOR $vA, $vA)>;
+
+def : Pat<(vnot_ppc (or v4i32:$A, v4i32:$B)),
+ (VNOR $A, $B)>;
+def : Pat<(and v4i32:$A, (vnot_ppc v4i32:$B)),
+ (VANDC $A, $B)>;
+
+def : Pat<(fmul v4f32:$vA, v4f32:$vB),
+ (VMADDFP $vA, $vB,
+ (v4i32 (VSLW (V_SETALLONES), (V_SETALLONES))))>;
+
+// Fused multiply add and multiply sub for packed float. These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(PPCvnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VMADDFP $A, $B, $C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp v4f32:$A, v4f32:$B, v4f32:$C),
+ (VNMSUBFP $A, $B, $C)>;
+
+def : Pat<(PPCvperm v16i8:$vA, v16i8:$vB, v16i8:$vC),
+ (VPERM $vA, $vB, $vC)>;
+
+def : Pat<(PPCfre v4f32:$A), (VREFP $A)>;
+def : Pat<(PPCfrsqrte v4f32:$A), (VRSQRTEFP $A)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSLB $vA, $vB))>;
+def : Pat<(v8i16 (shl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSLH $vA, $vB))>;
+def : Pat<(v4i32 (shl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSLW $vA, $vB))>;
+
+def : Pat<(v16i8 (srl v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRB $vA, $vB))>;
+def : Pat<(v8i16 (srl v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRH $vA, $vB))>;
+def : Pat<(v4i32 (srl v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRW $vA, $vB))>;
+
+def : Pat<(v16i8 (sra v16i8:$vA, v16i8:$vB)),
+ (v16i8 (VSRAB $vA, $vB))>;
+def : Pat<(v8i16 (sra v8i16:$vA, v8i16:$vB)),
+ (v8i16 (VSRAH $vA, $vB))>;
+def : Pat<(v4i32 (sra v4i32:$vA, v4i32:$vB)),
+ (v4i32 (VSRAW $vA, $vB))>;
+
+// Float to integer and integer to float conversions
+def : Pat<(v4i32 (fp_to_sint v4f32:$vA)),
+ (VCTSXS_0 $vA)>;
+def : Pat<(v4i32 (fp_to_uint v4f32:$vA)),
+ (VCTUXS_0 $vA)>;
+def : Pat<(v4f32 (sint_to_fp v4i32:$vA)),
+ (VCFSX_0 $vA)>;
+def : Pat<(v4f32 (uint_to_fp v4i32:$vA)),
+ (VCFUX_0 $vA)>;
+
+// Floating-point rounding
+def : Pat<(v4f32 (ffloor v4f32:$vA)),
+ (VRFIM $vA)>;
+def : Pat<(v4f32 (fceil v4f32:$vA)),
+ (VRFIP $vA)>;
+def : Pat<(v4f32 (ftrunc v4f32:$vA)),
+ (VRFIZ $vA)>;
+def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
+ (VRFIN $vA)>;
+
+} // end HasAltivec
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 000000000000..b424d1101416
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 000000000000..1e4396cd1017
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,1300 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+ field bits<32> SoftFail = 0;
+ let Size = 4;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+
+ // Fields used for relation models.
+ string BaseName = "";
+
+ // For cases where multiple instruction definitions really represent the
+ // same underlying instruction but with one definition for 64-bit arguments
+ // and one for 32-bit arguments, this bit breaks the degeneracy between
+ // the two forms and allows TableGen to generate mapping tables.
+ bit Interpretation64Bit = 0;
+}
+
+class PPC970_DGroup_First { bits<1> PPC970_First = 1; }
+class PPC970_DGroup_Single { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo { bits<3> PPC970_Unit = 0; }
+class PPC970_Unit_FXU { bits<3> PPC970_Unit = 1; }
+class PPC970_Unit_LSU { bits<3> PPC970_Unit = 2; }
+class PPC970_Unit_FPU { bits<3> PPC970_Unit = 3; }
+class PPC970_Unit_CRU { bits<3> PPC970_Unit = 4; }
+class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
+class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
+class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
+
+// Two joined instructions; used to emit two adjacent instructions as one.
+// The itinerary from the first instruction is used for scheduling and
+// classification.
+class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : Instruction {
+ field bits<64> Inst;
+ field bits<64> SoftFail = 0;
+ let Size = 8;
+
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let Namespace = "PPC";
+ let Inst{0-5} = opcode1;
+ let Inst{32-37} = opcode2;
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+
+ bits<1> PPC970_First = 0;
+ bits<1> PPC970_Single = 0;
+ bits<1> PPC970_Cracked = 0;
+ bits<3> PPC970_Unit = 0;
+
+ /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
+ /// these must be reflected there! See comments there for what these are.
+ let TSFlags{0} = PPC970_First;
+ let TSFlags{1} = PPC970_Single;
+ let TSFlags{2} = PPC970_Cracked;
+ let TSFlags{5-3} = PPC970_Unit;
+
+ // Fields used for relation models.
+ string BaseName = "";
+ bit Interpretation64Bit = 0;
+}
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+ bits<14> BD;
+
+ bits<5> BI;
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+
+ let Inst{6-10} = BIBO{4-0};
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_1<bits<6> opcode, bits<5> bo, bit aa, bit lk, dag OOL, dag IOL,
+ string asmstr>
+ : BForm<opcode, aa, lk, OOL, IOL, asmstr> {
+ let BIBO{4-0} = bo;
+ let BIBO{6-5} = 0;
+ let CR = 0;
+}
+
+class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = bi;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_3<bits<6> opcode, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = bo;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.3 SC-Form
+class SCForm<bits<6> opcode, bits<1> xo,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<7> LEV;
+
+ let Pattern = pattern;
+
+ let Inst{20-26} = LEV;
+ let Inst{30} = xo;
+}
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = Addr{20-16}; // Base Reg
+ let Inst{16-31} = Addr{15-0}; // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> C;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
+
+ // Even though ADDICo does not really have an RC bit, provide
+ // the declaration of one here so that isDOT has something to set.
+ bit RC = 0;
+}
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<16> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = 0;
+ let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> B;
+ bits<5> A;
+ bits<16> C;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = A;
+ let Inst{11-15} = B;
+ let Inst{16-31} = C;
+}
+
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
+ let A = R;
+ let B = R;
+ let C = 0;
+}
+
+class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<21> Addr;
+
+ let Pattern = pattern;
+ bits<24> LI;
+
+ let Inst{6-29} = LI;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+
+ let Inst{38-42} = A;
+ let Inst{43-47} = Addr{20-16}; // Base Reg
+ let Inst{48-63} = Addr{15-0}; // Displacement
+}
+
+// This is used to emit BL8+NOP.
+class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
+ dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : IForm_and_DForm_1<opcode1, aa, lk, opcode2,
+ OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<16> I;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<19> DS_RA;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = DS_RA{18-14}; // Register #
+ let Inst{16-29} = DS_RA{13-0}; // Displacement.
+ let Inst{30-31} = xo;
+}
+
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RST;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+ <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RST;
+ bits<5> B;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+}
+
+class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+ let B = 0;
+ let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<1> L;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10} = L;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<1> L;
+
+ let Inst{6-10} = RS;
+ let Inst{15} = L;
+ let Inst{21-30} = xo;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+ let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<5> FRB;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ let Pattern = pattern;
+ let Inst{6-10} = 31;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<2> L;
+
+ let Pattern = pattern;
+ let Inst{6-8} = 0;
+ let Inst{9-10} = L;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_24_eieio<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XForm_24_sync<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let L = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1. 42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RST;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let Pattern = pattern;
+ bits<5> FM;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FM;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+}
+
+// XX*-Form (VSX)
+class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = XT{5};
+}
+
+class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX2Form_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-15} = 0;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX2Form_2<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-13} = 0;
+ let Inst{14-15} = D;
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-29} = xo;
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> CR;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = CR;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = 0;
+}
+
+class XX3Form_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<2> D;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = 0;
+ let Inst{22-23} = D;
+ let Inst{24-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX3Form_Rc<bits<6> opcode, bits<7> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21} = RC;
+ let Inst{22-28} = xo;
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<6> XT;
+ bits<6> XA;
+ bits<6> XB;
+ bits<6> XC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = XT{4-0};
+ let Inst{11-15} = XA{4-0};
+ let Inst{16-20} = XB{4-0};
+ let Inst{21-25} = XC{4-0};
+ let Inst{26-27} = xo;
+ let Inst{28} = XC{5};
+ let Inst{29} = XA{5};
+ let Inst{30} = XB{5};
+ let Inst{31} = XT{5};
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = immfield;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<31, OOL, IOL, asmstr, itin> {
+ bits<1> T;
+ bits<2> STRM;
+ bits<5> A;
+ bits<5> B;
+
+ let Pattern = pattern;
+
+ let Inst{6} = T;
+ let Inst{7-8} = 0;
+ let Inst{9-10} = STRM;
+ let Inst{11-15} = A;
+ let Inst{16-20} = B;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+ bits<5> CRA;
+ bits<5> CRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRA;
+ let Inst{16-20} = CRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> CRD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = CRD;
+ let Inst{11-15} = CRD;
+ let Inst{16-20} = CRD;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<2> BH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-18} = 0;
+ let Inst{19-20} = BH;
+ let Inst{21-30} = xo;
+ let Inst{31} = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ bits<7> BIBO; // 2 bits of BI and 5 bits of BO.
+ bits<3> CR;
+
+ let BO = BIBO{4-0};
+ let BI{0-1} = BIBO{5-6};
+ let BI{2-4} = CR{0-2};
+ let BH = 0;
+}
+
+class XLForm_2_br2<bits<6> opcode, bits<10> xo, bits<5> bo, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BH = 0;
+}
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo, bits<5> bi, bit lk,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+ let BO = bo;
+ let BI = bi;
+ let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<3> BFA;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-13} = BFA;
+ let Inst{14-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<10> SPR;
+
+ let Inst{6-10} = RT;
+ let Inst{11} = SPR{4};
+ let Inst{12} = SPR{3};
+ let Inst{13} = SPR{2};
+ let Inst{14} = SPR{1};
+ let Inst{15} = SPR{0};
+ let Inst{16} = SPR{9};
+ let Inst{17} = SPR{8};
+ let Inst{18} = SPR{7};
+ let Inst{19} = SPR{6};
+ let Inst{20} = SPR{5};
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+
+ let Inst{6-10} = RT;
+ let Inst{11-20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FXM;
+ bits<5> rS;
+
+ let Inst{6-10} = rS;
+ let Inst{11} = 0;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> ST;
+ bits<8> FXM;
+
+ let Inst{6-10} = ST;
+ let Inst{11} = 1;
+ let Inst{12-19} = FXM;
+ let Inst{20} = 0;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+ let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag>pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<8> FM;
+ bits<5> rT;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6} = 0;
+ let Inst{7-14} = FM;
+ let Inst{15} = 0;
+ let Inst{16-20} = rT;
+ let Inst{21-30} = xo;
+ let Inst{31} = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> A;
+ bits<5> RS;
+ bits<6> SH;
+
+ bit RC = 0; // set by isDOT
+ let Pattern = pattern;
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = A;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21} = oe;
+ let Inst{22-30} = xo;
+ let Inst{31} = RC;
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+ let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRC;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = FRT;
+ let Inst{11-15} = FRA;
+ let Inst{16-20} = FRB;
+ let Inst{21-25} = FRC;
+ let Inst{26-30} = xo;
+ let Inst{31} = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let FRB = 0;
+}
+
+class AForm_4<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+ bits<5> COND;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = COND;
+ let Inst{26-30} = xo;
+ let Inst{31} = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<6> SH;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH{4,3,2,1,0};
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-29} = xo;
+ let Inst{30} = SH{5};
+ let Inst{31} = RC;
+}
+
+class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> RB;
+ bits<6> MBE;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isDOT
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-26} = MBE{4,3,2,1,0,5};
+ let Inst{27-30} = xo;
+ let Inst{31} = RC;
+}
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VC;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<5> VC;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-25} = VC;
+ let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bits<4> SH;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = 0;
+ let Inst{22-25} = SH;
+ let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+ let VA = VD;
+ let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> IMM;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = IMM;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VB;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = 0;
+ let Inst{16-20} = VB;
+ let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : I<4, OOL, IOL, asmstr, itin> {
+ bits<5> VD;
+ bits<5> VA;
+ bits<5> VB;
+ bit RC = 0;
+
+ let Pattern = pattern;
+
+ let Inst{6-10} = VD;
+ let Inst{11-15} = VA;
+ let Inst{16-20} = VB;
+ let Inst{21} = RC;
+ let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : I<0, OOL, IOL, asmstr, NoItinerary> {
+ let isCodeGenOnly = 1;
+ let PPC64 = 0;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 000000000000..9bac91d7d412
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,2246 @@
+//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "PPC.h"
+#include "PPCHazardRecognizers.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-instr-info"
+
+#define GET_INSTRMAP_INFO
+#define GET_INSTRINFO_CTOR_DTOR
+#include "PPCGenInstrInfo.inc"
+
+static cl::
+opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
+ cl::desc("Disable analysis for CTR loops"));
+
+static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
+cl::desc("Disable compare instruction optimization"), cl::Hidden);
+
+static cl::opt<bool> DisableVSXFMAMutate("disable-ppc-vsx-fma-mutation",
+cl::desc("Disable VSX FMA instruction mutation"), cl::Hidden);
+
+static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",
+cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),
+cl::Hidden);
+
+// Pin the vtable to this file.
+void PPCInstrInfo::anchor() {}
+
+PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)
+ : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+ Subtarget(STI), RI(STI) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *
+PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ static_cast<const PPCSubtarget *>(STI)->getDarwinDirective();
+ if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||
+ Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {
+ const InstrItineraryData *II =
+ &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();
+ return new ScoreboardHazardRecognizer(II, DAG);
+ }
+
+ return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);
+}
+
+/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer
+/// to use for this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ unsigned Directive =
+ DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective();
+
+ if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)
+ return new PPCDispatchGroupSBHazardRecognizer(II, DAG);
+
+ // Most subtargets use a PPC970 recognizer.
+ if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
+ Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
+ assert(DAG->TII && "No InstrInfo?");
+
+ return new PPCHazardRecognizer970(*DAG);
+ }
+
+ return new ScoreboardHazardRecognizer(II, DAG);
+}
+
+
+int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const {
+ int Latency = PPCGenInstrInfo::getOperandLatency(ItinData, DefMI, DefIdx,
+ UseMI, UseIdx);
+
+ const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+ unsigned Reg = DefMO.getReg();
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ bool IsRegCR;
+ if (TRI->isVirtualRegister(Reg)) {
+ const MachineRegisterInfo *MRI =
+ &DefMI->getParent()->getParent()->getRegInfo();
+ IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||
+ MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);
+ } else {
+ IsRegCR = PPC::CRRCRegClass.contains(Reg) ||
+ PPC::CRBITRCRegClass.contains(Reg);
+ }
+
+ if (UseMI->isBranch() && IsRegCR) {
+ if (Latency < 0)
+ Latency = getInstrLatency(ItinData, DefMI);
+
+ // On some cores, there is an additional delay between writing to a condition
+ // register, and using it from a branch.
+ unsigned Directive = Subtarget.getDarwinDirective();
+ switch (Directive) {
+ default: break;
+ case PPC::DIR_7400:
+ case PPC::DIR_750:
+ case PPC::DIR_970:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR4:
+ case PPC::DIR_PWR5:
+ case PPC::DIR_PWR5X:
+ case PPC::DIR_PWR6:
+ case PPC::DIR_PWR6X:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ Latency += 2;
+ break;
+ }
+ }
+
+ return Latency;
+}
+
+// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.
+bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default: return false;
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64:
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = PPC::sub_32;
+ return true;
+ }
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ // Note: This list must be kept consistent with LoadRegFromStackSlot.
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::LD:
+ case PPC::LWZ:
+ case PPC::LFS:
+ case PPC::LFD:
+ case PPC::RESTORE_CR:
+ case PPC::RESTORE_CRBIT:
+ case PPC::LVX:
+ case PPC::LXVD2X:
+ case PPC::RESTORE_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ // Note: This list must be kept consistent with StoreRegToStackSlot.
+ switch (MI->getOpcode()) {
+ default: break;
+ case PPC::STD:
+ case PPC::STW:
+ case PPC::STFS:
+ case PPC::STFD:
+ case PPC::SPILL_CR:
+ case PPC::SPILL_CRBIT:
+ case PPC::STVX:
+ case PPC::STXVD2X:
+ case PPC::SPILL_VRSAVE:
+ // Check for the operands added by addFrameReference (the immediate is the
+ // offset which defaults to 0).
+ if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+ MI->getOperand(2).isFI()) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero. We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+ MachineFunction &MF = *MI->getParent()->getParent();
+
+ // Normal instructions can be commuted the obvious way.
+ if (MI->getOpcode() != PPC::RLWIMI &&
+ MI->getOpcode() != PPC::RLWIMIo &&
+ MI->getOpcode() != PPC::RLWIMI8 &&
+ MI->getOpcode() != PPC::RLWIMI8o)
+ return TargetInstrInfo::commuteInstruction(MI, NewMI);
+
+ // Cannot commute if it has a non-zero rotate count.
+ if (MI->getOperand(3).getImm() != 0)
+ return nullptr;
+
+ // If we have a zero rotate count, we have:
+ // M = mask(MB,ME)
+ // Op0 = (Op1 & ~M) | (Op2 & M)
+ // Change this to:
+ // M = mask((ME+1)&31, (MB-1)&31)
+ // Op0 = (Op2 & ~M) | (Op1 & M)
+
+ // Swap op1/op2
+ unsigned Reg0 = MI->getOperand(0).getReg();
+ unsigned Reg1 = MI->getOperand(1).getReg();
+ unsigned Reg2 = MI->getOperand(2).getReg();
+ unsigned SubReg1 = MI->getOperand(1).getSubReg();
+ unsigned SubReg2 = MI->getOperand(2).getSubReg();
+ bool Reg1IsKill = MI->getOperand(1).isKill();
+ bool Reg2IsKill = MI->getOperand(2).isKill();
+ bool ChangeReg0 = false;
+ // If machine instrs are no longer in two-address forms, update
+ // destination register as well.
+ if (Reg0 == Reg1) {
+ // Must be two address instruction!
+ assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
+ "Expecting a two-address instruction!");
+ assert(MI->getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");
+ Reg2IsKill = false;
+ ChangeReg0 = true;
+ }
+
+ // Masks.
+ unsigned MB = MI->getOperand(4).getImm();
+ unsigned ME = MI->getOperand(5).getImm();
+
+ if (NewMI) {
+ // Create a new instruction.
+ unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+ bool Reg0IsDead = MI->getOperand(0).isDead();
+ return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+ .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+ .addReg(Reg2, getKillRegState(Reg2IsKill))
+ .addReg(Reg1, getKillRegState(Reg1IsKill))
+ .addImm((ME+1) & 31)
+ .addImm((MB-1) & 31);
+ }
+
+ if (ChangeReg0) {
+ MI->getOperand(0).setReg(Reg2);
+ MI->getOperand(0).setSubReg(SubReg2);
+ }
+ MI->getOperand(2).setReg(Reg1);
+ MI->getOperand(1).setReg(Reg2);
+ MI->getOperand(2).setSubReg(SubReg1);
+ MI->getOperand(1).setSubReg(SubReg2);
+ MI->getOperand(2).setIsKill(Reg1IsKill);
+ MI->getOperand(1).setIsKill(Reg2IsKill);
+
+ // Swap the mask around.
+ MI->getOperand(4).setImm((ME+1) & 31);
+ MI->getOperand(5).setImm((MB-1) & 31);
+ return MI;
+}
+
+bool PPCInstrInfo::findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const {
+ // For VSX A-Type FMA instructions, it is the first two operands that can be
+ // commuted, however, because the non-encoded tied input operand is listed
+ // first, the operands to swap are actually the second and third.
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+
+ SrcOpIdx1 = 2;
+ SrcOpIdx2 = 3;
+ return true;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const {
+ // This function is used for scheduling, and the nop wanted here is the type
+ // that terminates dispatch groups on the POWER cores.
+ unsigned Directive = Subtarget.getDarwinDirective();
+ unsigned Opcode;
+ switch (Directive) {
+ default: Opcode = PPC::NOP; break;
+ case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;
+ case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;
+ case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */
+ }
+
+ DebugLoc DL;
+ BuildMI(MBB, MI, DL, get(Opcode));
+}
+
+// Branch analysis.
+// Note: If the condition register is set to CTR or CTR8 then this is a
+// BDNZ (imm == 1) or BDZ (imm == 0) branch.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ bool isPPC64 = Subtarget.isPPC64();
+
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (LastInst->getOpcode() == PPC::B) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCC) {
+ if (!LastInst->getOperand(2).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(2).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BC) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BCn) {
+ if (!LastInst->getOperand(1).isMBB())
+ return true;
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
+ LastInst->getOpcode() == PPC::BDNZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ } else if (LastInst->getOpcode() == PPC::BDZ8 ||
+ LastInst->getOpcode() == PPC::BDZ) {
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ return false;
+ }
+
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with PPC::B and PPC:BCC, handle it.
+ if (SecondLastInst->getOpcode() == PPC::BCC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(2).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(2).getMBB();
+ Cond.push_back(SecondLastInst->getOperand(0));
+ Cond.push_back(SecondLastInst->getOperand(1));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BC &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (SecondLastInst->getOpcode() == PPC::BCn &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(1).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDNZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(1));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
+ SecondLastInst->getOpcode() == PPC::BDZ) &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB() ||
+ !LastInst->getOperand(0).isMBB())
+ return true;
+ if (DisableCTRLoopAnal)
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(0));
+ Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
+ true));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two PPC:Bs, handle it. The second one is not
+ // executed, so remove it.
+ if (SecondLastInst->getOpcode() == PPC::B &&
+ LastInst->getOpcode() == PPC::B) {
+ if (!SecondLastInst->getOperand(0).isMBB())
+ return true;
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin()) return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
+ return 0;
+
+ // Remove the branch.
+ I->eraseFromParent();
+
+ I = MBB.end();
+
+ if (I == MBB.begin()) return 1;
+ --I;
+ if (I->getOpcode() != PPC::BCC &&
+ I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&
+ I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&
+ I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)
+ return 1;
+
+ // Remove the branch.
+ I->eraseFromParent();
+ return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "PPC branch conditions have two components!");
+
+ bool isPPC64 = Subtarget.isPPC64();
+
+ // One-way branch.
+ if (!FBB) {
+ if (Cond.empty()) // Unconditional branch
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+ else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ else // Conditional branch
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ return 1;
+ }
+
+ // Two-way Conditional Branch.
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ BuildMI(&MBB, DL, get(Cond[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_SET)
+ BuildMI(&MBB, DL, get(PPC::BC)).addOperand(Cond[1]).addMBB(TBB);
+ else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)
+ BuildMI(&MBB, DL, get(PPC::BCn)).addOperand(Cond[1]).addMBB(TBB);
+ else
+ BuildMI(&MBB, DL, get(PPC::BCC))
+ .addImm(Cond[0].getImm()).addOperand(Cond[1]).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+ return 2;
+}
+
+// Select analysis.
+bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg,
+ int &CondCycles, int &TrueCycles, int &FalseCycles) const {
+ if (!Subtarget.hasISEL())
+ return false;
+
+ if (Cond.size() != 2)
+ return false;
+
+ // If this is really a bdnz-like condition, then it cannot be turned into a
+ // select.
+ if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
+ return false;
+
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
+ return false;
+
+ // isel is for regular integer GPRs only.
+ if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
+ !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
+ !PPC::G8RCRegClass.hasSubClassEq(RC) &&
+ !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
+ return false;
+
+ // FIXME: These numbers are for the A2, how well they work for other cores is
+ // an open question. On the A2, the isel instruction has a 2-cycle latency
+ // but single-cycle throughput. These numbers are used in combination with
+ // the MispredictPenalty setting from the active SchedMachineModel.
+ CondCycles = 1;
+ TrueCycles = 1;
+ FalseCycles = 1;
+
+ return true;
+}
+
+void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc dl,
+ unsigned DestReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ assert(Cond.size() == 2 &&
+ "PPC branch conditions have two components!");
+
+ assert(Subtarget.hasISEL() &&
+ "Cannot insert select on target without ISEL support");
+
+ // Get the register classes.
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ assert(RC && "TrueReg and FalseReg must have overlapping register classes");
+
+ bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
+ assert((Is64Bit ||
+ PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
+ "isel is for regular integer GPRs only");
+
+ unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
+ unsigned SelectPred = Cond[0].getImm();
+
+ unsigned SubIdx;
+ bool SwapOps;
+ switch (SelectPred) {
+ default: llvm_unreachable("invalid predicate for isel");
+ case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
+ case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
+ }
+
+ unsigned FirstReg = SwapOps ? FalseReg : TrueReg,
+ SecondReg = SwapOps ? TrueReg : FalseReg;
+
+ // The first input register of isel cannot be r0. If it is a member
+ // of a register class that can be r0, then copy it first (the
+ // register allocator should eliminate the copy).
+ if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||
+ MRI.getRegClass(FirstReg)->contains(PPC::X0)) {
+ const TargetRegisterClass *FirstRC =
+ MRI.getRegClass(FirstReg)->contains(PPC::X0) ?
+ &PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;
+ unsigned OldFirstReg = FirstReg;
+ FirstReg = MRI.createVirtualRegister(FirstRC);
+ BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)
+ .addReg(OldFirstReg);
+ }
+
+ BuildMI(MBB, MI, dl, get(OpCode), DestReg)
+ .addReg(FirstReg).addReg(SecondReg)
+ .addReg(Cond[1].getReg(), 0, SubIdx);
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // We can end up with self copies and similar things as a result of VSX copy
+ // legalization. Promote them here.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ if (PPC::F8RCRegClass.contains(DestReg) &&
+ PPC::VSLRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(DestReg) &&
+ PPC::VSHRCRegClass.contains(SrcReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && SrcReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ DestReg = SuperReg;
+ } else if (PPC::F8RCRegClass.contains(SrcReg) &&
+ PPC::VSLRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ } else if (PPC::VRRCRegClass.contains(SrcReg) &&
+ PPC::VSHRCRegClass.contains(DestReg)) {
+ unsigned SuperReg =
+ TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
+
+ if (VSXSelfCopyCrash && DestReg == SuperReg)
+ llvm_unreachable("nop VSX copy");
+
+ SrcReg = SuperReg;
+ }
+
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))
+ // There are two different ways this can be done:
+ // 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only
+ // issue in VSU pipeline 0.
+ // 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but
+ // can go to either pipeline.
+ // We'll always use xxlor here, because in practically all cases where
+ // copies are generated, they are close enough to some use that the
+ // lower-latency form is preferable.
+ Opc = PPC::XXLOR;
+ else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::XXLORf;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const MCInstrDesc &MCID = get(Opc);
+ if (MCID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, MCID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+// This function returns true if a CR spill is necessary and false otherwise.
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill,
+ int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional store instructions are added here,
+ // update isStoreToStackSlot.
+
+ DebugLoc DL;
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CRBIT))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ return true;
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STVX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE))
+ .addReg(SrcReg,
+ getKillRegState(isKill)),
+ FrameIdx));
+ SpillsVRS = true;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
+ FuncInfo->setSpillsCR();
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+bool
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const{
+ // Note: If additional load instructions are added here,
+ // update isLoadFromStackSlot.
+
+ if (PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+ DestReg), FrameIdx));
+ } else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+ FrameIdx));
+ } else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+ FrameIdx));
+ } else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+ FrameIdx));
+ } else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CR), DestReg),
+ FrameIdx));
+ return true;
+ } else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_CRBIT), DestReg),
+ FrameIdx));
+ return true;
+ } else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LVX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
+ FrameIdx));
+ NonRI = true;
+ } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
+ assert(Subtarget.isDarwin() &&
+ "VRSAVE only needs spill/restore on Darwin");
+ NewMIs.push_back(addFrameReference(BuildMI(MF, DL,
+ get(PPC::RESTORE_VRSAVE),
+ DestReg),
+ FrameIdx));
+ SpillsVRS = true;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ return false;
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ SmallVector<MachineInstr*, 4> NewMIs;
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasSpills();
+
+ bool NonRI = false, SpillsVRS = false;
+ if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
+ NonRI, SpillsVRS))
+ FuncInfo->setSpillsCR();
+
+ if (SpillsVRS)
+ FuncInfo->setSpillsVRSAVE();
+
+ if (NonRI)
+ FuncInfo->setHasNonRISpills();
+
+ for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+ MBB.insert(MI, NewMIs[i]);
+
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
+ NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+ if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
+ Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
+ else
+ // Leave the CR# the same, but invert the condition.
+ Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+ return false;
+}
+
+bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const {
+ // For some instructions, it is legal to fold ZERO into the RA register field.
+ // A zero immediate should always be loaded with a single li.
+ unsigned DefOpc = DefMI->getOpcode();
+ if (DefOpc != PPC::LI && DefOpc != PPC::LI8)
+ return false;
+ if (!DefMI->getOperand(1).isImm())
+ return false;
+ if (DefMI->getOperand(1).getImm() != 0)
+ return false;
+
+ // Note that we cannot here invert the arguments of an isel in order to fold
+ // a ZERO into what is presented as the second argument. All we have here
+ // is the condition bit, and that might come from a CR-logical bit operation.
+
+ const MCInstrDesc &UseMCID = UseMI->getDesc();
+
+ // Only fold into real machine instructions.
+ if (UseMCID.isPseudo())
+ return false;
+
+ unsigned UseIdx;
+ for (UseIdx = 0; UseIdx < UseMI->getNumOperands(); ++UseIdx)
+ if (UseMI->getOperand(UseIdx).isReg() &&
+ UseMI->getOperand(UseIdx).getReg() == Reg)
+ break;
+
+ assert(UseIdx < UseMI->getNumOperands() && "Cannot find Reg in UseMI");
+ assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");
+
+ const MCOperandInfo *UseInfo = &UseMCID.OpInfo[UseIdx];
+
+ // We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX0
+ // register (which might also be specified as a pointer class kind).
+ if (UseInfo->isLookupPtrRegClass()) {
+ if (UseInfo->RegClass /* Kind */ != 1)
+ return false;
+ } else {
+ if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&
+ UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)
+ return false;
+ }
+
+ // Make sure this is not tied to an output register (or otherwise
+ // constrained). This is true for ST?UX registers, for example, which
+ // are tied to their output registers.
+ if (UseInfo->Constraints != 0)
+ return false;
+
+ unsigned ZeroReg;
+ if (UseInfo->isLookupPtrRegClass()) {
+ bool isPPC64 = Subtarget.isPPC64();
+ ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;
+ } else {
+ ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?
+ PPC::ZERO8 : PPC::ZERO;
+ }
+
+ bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+ UseMI->getOperand(UseIdx).setReg(ZeroReg);
+
+ if (DeleteDef)
+ DefMI->eraseFromParent();
+
+ return true;
+}
+
+static bool MBBDefinesCTR(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I)
+ if (I->definesRegister(PPC::CTR) || I->definesRegister(PPC::CTR8))
+ return true;
+ return false;
+}
+
+// We should make sure that, if we're going to predicate both sides of a
+// condition (a diamond), that both sides don't define the counter register. We
+// can predicate counter-decrement-based branches, but while that predicates
+// the branching, it does not predicate the counter decrement. If we tried to
+// merge the triangle into one predicated block, we'd decrement the counter
+// twice.
+bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const {
+ return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));
+}
+
+
+bool PPCInstrInfo::isPredicated(const MachineInstr *MI) const {
+ // The predicated branches are identified by their type, not really by the
+ // explicit presence of a predicate. Furthermore, some of them can be
+ // predicated more than once. Because if conversion won't try to predicate
+ // any instruction which already claims to be predicated (by returning true
+ // here), always return false. In doing so, we let isPredicable() be the
+ // final word on whether not the instruction can be (further) predicated.
+
+ return false;
+}
+
+bool PPCInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+ if (!MI->isTerminator())
+ return false;
+
+ // Conditional branch is a special case.
+ if (MI->isBranch() && !MI->isBarrier())
+ return true;
+
+ return !isPredicated(MI);
+}
+
+bool PPCInstrInfo::PredicateInstruction(
+ MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const {
+ unsigned OpC = MI->getOpcode();
+ if (OpC == PPC::BLR) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = Subtarget.isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) :
+ (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MI->setDesc(get(PPC::BCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(PPC::BCLRn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ } else {
+ MI->setDesc(get(PPC::BCCLR));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ }
+
+ return true;
+ } else if (OpC == PPC::B) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {
+ bool isPPC64 = Subtarget.isPPC64();
+ MI->setDesc(get(Pred[0].getImm() ?
+ (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCn));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ } else {
+ MachineBasicBlock *MBB = MI->getOperand(0).getMBB();
+ MI->RemoveOperand(0);
+
+ MI->setDesc(get(PPC::BCC));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg())
+ .addMBB(MBB);
+ }
+
+ return true;
+ } else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 ||
+ OpC == PPC::BCTRL || OpC == PPC::BCTRL8) {
+ if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)
+ llvm_unreachable("Cannot predicate bctr[l] on the ctr register");
+
+ bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8;
+ bool isPPC64 = Subtarget.isPPC64();
+
+ if (Pred[0].getImm() == PPC::PRED_BIT_SET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) :
+ (setLR ? PPC::BCCTRL : PPC::BCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ } else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n) :
+ (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8) :
+ (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));
+ MachineInstrBuilder(*MI->getParent()->getParent(), MI)
+ .addImm(Pred[0].getImm())
+ .addReg(Pred[1].getReg());
+ return true;
+ }
+
+ return false;
+}
+
+bool PPCInstrInfo::SubsumesPredicate(
+ const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const {
+ assert(Pred1.size() == 2 && "Invalid PPC first predicate");
+ assert(Pred2.size() == 2 && "Invalid PPC second predicate");
+
+ if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)
+ return false;
+ if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
+ return false;
+
+ // P1 can only subsume P2 if they test the same condition register.
+ if (Pred1[1].getReg() != Pred2[1].getReg())
+ return false;
+
+ PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
+ PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
+
+ if (P1 == P2)
+ return true;
+
+ // Does P1 subsume P2, e.g. GE subsumes GT.
+ if (P1 == PPC::PRED_LE &&
+ (P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))
+ return true;
+ if (P1 == PPC::PRED_GE &&
+ (P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))
+ return true;
+
+ return false;
+}
+
+bool PPCInstrInfo::DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const {
+ // Note: At the present time, the contents of Pred from this function is
+ // unused by IfConversion. This implementation follows ARM by pushing the
+ // CR-defining operand. Because the 'DZ' and 'DNZ' count as types of
+ // predicate, instructions defining CTR or CTR8 are also included as
+ // predicate-defining instructions.
+
+ const TargetRegisterClass *RCs[] =
+ { &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,
+ &PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };
+
+ bool Found = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+ const TargetRegisterClass *RC = RCs[c];
+ if (MO.isReg()) {
+ if (MO.isDef() && RC->contains(MO.getReg())) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ } else if (MO.isRegMask()) {
+ for (TargetRegisterClass::iterator I = RC->begin(),
+ IE = RC->end(); I != IE; ++I)
+ if (MO.clobbersPhysReg(*I)) {
+ Pred.push_back(MO);
+ Found = true;
+ }
+ }
+ }
+ }
+
+ return Found;
+}
+
+bool PPCInstrInfo::isPredicable(MachineInstr *MI) const {
+ unsigned OpC = MI->getOpcode();
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::B:
+ case PPC::BLR:
+ case PPC::BCTR:
+ case PPC::BCTR8:
+ case PPC::BCTRL:
+ case PPC::BCTRL8:
+ return true;
+ }
+}
+
+bool PPCInstrInfo::analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const {
+ unsigned Opc = MI->getOpcode();
+
+ switch (Opc) {
+ default: return false;
+ case PPC::CMPWI:
+ case PPC::CMPLWI:
+ case PPC::CMPDI:
+ case PPC::CMPLDI:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ Value = MI->getOperand(2).getImm();
+ Mask = 0xFFFF;
+ return true;
+ case PPC::CMPW:
+ case PPC::CMPLW:
+ case PPC::CMPD:
+ case PPC::CMPLD:
+ case PPC::FCMPUS:
+ case PPC::FCMPUD:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ return true;
+ }
+}
+
+bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const {
+ if (DisableCmpOpt)
+ return false;
+
+ int OpC = CmpInstr->getOpcode();
+ unsigned CRReg = CmpInstr->getOperand(0).getReg();
+
+ // FP record forms set CR1 based on the execption status bits, not a
+ // comparison with zero.
+ if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)
+ return false;
+
+ // The record forms set the condition register based on a signed comparison
+ // with zero (so says the ISA manual). This is not as straightforward as it
+ // seems, however, because this is always a 64-bit comparison on PPC64, even
+ // for instructions that are 32-bit in nature (like slw for example).
+ // So, on PPC32, for unsigned comparisons, we can use the record forms only
+ // for equality checks (as those don't depend on the sign). On PPC64,
+ // we are restricted to equality for unsigned 64-bit comparisons and for
+ // signed 32-bit comparisons the applicability is more restricted.
+ bool isPPC64 = Subtarget.isPPC64();
+ bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;
+ bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;
+ bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;
+
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI) return false;
+ int MIOpC = MI->getOpcode();
+
+ bool equalityOnly = false;
+ bool noSub = false;
+ if (isPPC64) {
+ if (is32BitSignedCompare) {
+ // We can perform this optimization only if MI is sign-extending.
+ if (MIOpC == PPC::SRAW || MIOpC == PPC::SRAWo ||
+ MIOpC == PPC::SRAWI || MIOpC == PPC::SRAWIo ||
+ MIOpC == PPC::EXTSB || MIOpC == PPC::EXTSBo ||
+ MIOpC == PPC::EXTSH || MIOpC == PPC::EXTSHo ||
+ MIOpC == PPC::EXTSW || MIOpC == PPC::EXTSWo) {
+ noSub = true;
+ } else
+ return false;
+ } else if (is32BitUnsignedCompare) {
+ // We can perform this optimization, equality only, if MI is
+ // zero-extending.
+ if (MIOpC == PPC::CNTLZW || MIOpC == PPC::CNTLZWo ||
+ MIOpC == PPC::SLW || MIOpC == PPC::SLWo ||
+ MIOpC == PPC::SRW || MIOpC == PPC::SRWo) {
+ noSub = true;
+ equalityOnly = true;
+ } else
+ return false;
+ } else
+ equalityOnly = is64BitUnsignedCompare;
+ } else
+ equalityOnly = is32BitUnsignedCompare;
+
+ if (equalityOnly) {
+ // We need to check the uses of the condition register in order to reject
+ // non-equality comparisons.
+ for (MachineRegisterInfo::use_instr_iterator I =MRI->use_instr_begin(CRReg),
+ IE = MRI->use_instr_end(); I != IE; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->getOpcode() == PPC::BCC) {
+ unsigned Pred = UseMI->getOperand(0).getImm();
+ if (Pred != PPC::PRED_EQ && Pred != PPC::PRED_NE)
+ return false;
+ } else if (UseMI->getOpcode() == PPC::ISEL ||
+ UseMI->getOpcode() == PPC::ISEL8) {
+ unsigned SubIdx = UseMI->getOperand(3).getSubReg();
+ if (SubIdx != PPC::sub_eq)
+ return false;
+ } else
+ return false;
+ }
+ }
+
+ MachineBasicBlock::iterator I = CmpInstr;
+
+ // Scan forward to find the first use of the compare.
+ for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
+ I != EL; ++I) {
+ bool FoundUse = false;
+ for (MachineRegisterInfo::use_instr_iterator J =MRI->use_instr_begin(CRReg),
+ JE = MRI->use_instr_end(); J != JE; ++J)
+ if (&*J == &*I) {
+ FoundUse = true;
+ break;
+ }
+
+ if (FoundUse)
+ break;
+ }
+
+ // There are two possible candidates which can be changed to set CR[01].
+ // One is MI, the other is a SUB instruction.
+ // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
+ MachineInstr *Sub = nullptr;
+ if (SrcReg2 != 0)
+ // MI is not a candidate for CMPrr.
+ MI = nullptr;
+ // FIXME: Conservatively refuse to convert an instruction which isn't in the
+ // same BB as the comparison. This is to allow the check below to avoid calls
+ // (and other explicit clobbers); instead we should really check for these
+ // more explicitly (in at least a few predecessors).
+ else if (MI->getParent() != CmpInstr->getParent() || Value != 0) {
+ // PPC does not have a record-form SUBri.
+ return false;
+ }
+
+ // Search for Sub.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ --I;
+
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator E = MI,
+ B = CmpInstr->getParent()->begin();
+
+ for (; I != E && !noSub; --I) {
+ const MachineInstr &Instr = *I;
+ unsigned IOpC = Instr.getOpcode();
+
+ if (&*I != CmpInstr && (
+ Instr.modifiesRegister(PPC::CR0, TRI) ||
+ Instr.readsRegister(PPC::CR0, TRI)))
+ // This instruction modifies or uses the record condition register after
+ // the one we want to change. While we could do this transformation, it
+ // would likely not be profitable. This transformation removes one
+ // instruction, and so even forcing RA to generate one move probably
+ // makes it unprofitable.
+ return false;
+
+ // Check whether CmpInstr can be made redundant by the current instruction.
+ if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||
+ OpC == PPC::CMPD || OpC == PPC::CMPLD) &&
+ (IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&
+ ((Instr.getOperand(1).getReg() == SrcReg &&
+ Instr.getOperand(2).getReg() == SrcReg2) ||
+ (Instr.getOperand(1).getReg() == SrcReg2 &&
+ Instr.getOperand(2).getReg() == SrcReg))) {
+ Sub = &*I;
+ break;
+ }
+
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ // Return false if no candidates exist.
+ if (!MI && !Sub)
+ return false;
+
+ // The single candidate is called MI.
+ if (!MI) MI = Sub;
+
+ int NewOpC = -1;
+ MIOpC = MI->getOpcode();
+ if (MIOpC == PPC::ANDIo || MIOpC == PPC::ANDIo8)
+ NewOpC = MIOpC;
+ else {
+ NewOpC = PPC::getRecordFormOpcode(MIOpC);
+ if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)
+ NewOpC = MIOpC;
+ }
+
+ // FIXME: On the non-embedded POWER architectures, only some of the record
+ // forms are fast, and we should use only the fast ones.
+
+ // The defining instruction has a record form (or is already a record
+ // form). It is possible, however, that we'll need to reverse the condition
+ // code of the users.
+ if (NewOpC == -1)
+ return false;
+
+ SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;
+ SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;
+
+ // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP
+ // needs to be updated to be based on SUB. Push the condition code
+ // operands to OperandsToUpdate. If it is safe to remove CmpInstr, the
+ // condition code of these operands will be modified.
+ bool ShouldSwap = false;
+ if (Sub) {
+ ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
+ Sub->getOperand(2).getReg() == SrcReg;
+
+ // The operands to subf are the opposite of sub, so only in the fixed-point
+ // case, invert the order.
+ ShouldSwap = !ShouldSwap;
+ }
+
+ if (ShouldSwap)
+ for (MachineRegisterInfo::use_instr_iterator
+ I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();
+ I != IE; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->getOpcode() == PPC::BCC) {
+ PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();
+ assert((!equalityOnly ||
+ Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE) &&
+ "Invalid predicate for equality-only optimization");
+ PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),
+ PPC::getSwappedPredicate(Pred)));
+ } else if (UseMI->getOpcode() == PPC::ISEL ||
+ UseMI->getOpcode() == PPC::ISEL8) {
+ unsigned NewSubReg = UseMI->getOperand(3).getSubReg();
+ assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&
+ "Invalid CR bit for equality-only optimization");
+
+ if (NewSubReg == PPC::sub_lt)
+ NewSubReg = PPC::sub_gt;
+ else if (NewSubReg == PPC::sub_gt)
+ NewSubReg = PPC::sub_lt;
+
+ SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),
+ NewSubReg));
+ } else // We need to abort on a user we don't understand.
+ return false;
+ }
+
+ // Create a new virtual register to hold the value of the CR set by the
+ // record-form instruction. If the instruction was not previously in
+ // record form, then set the kill flag on the CR.
+ CmpInstr->eraseFromParent();
+
+ MachineBasicBlock::iterator MII = MI;
+ BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),
+ get(TargetOpcode::COPY), CRReg)
+ .addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);
+
+ if (MIOpC != NewOpC) {
+ // We need to be careful here: we're replacing one instruction with
+ // another, and we need to make sure that we get all of the right
+ // implicit uses and defs. On the other hand, the caller may be holding
+ // an iterator to this instruction, and so we can't delete it (this is
+ // specifically the case if this is the instruction directly after the
+ // compare).
+
+ const MCInstrDesc &NewDesc = get(NewOpC);
+ MI->setDesc(NewDesc);
+
+ if (NewDesc.ImplicitDefs)
+ for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs();
+ *ImpDefs; ++ImpDefs)
+ if (!MI->definesRegister(*ImpDefs))
+ MI->addOperand(*MI->getParent()->getParent(),
+ MachineOperand::CreateReg(*ImpDefs, true, true));
+ if (NewDesc.ImplicitUses)
+ for (const uint16_t *ImpUses = NewDesc.getImplicitUses();
+ *ImpUses; ++ImpUses)
+ if (!MI->readsRegister(*ImpUses))
+ MI->addOperand(*MI->getParent()->getParent(),
+ MachineOperand::CreateReg(*ImpUses, false, true));
+ }
+
+ // Modify the condition code of operands in OperandsToUpdate.
+ // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
+ // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
+ for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)
+ PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);
+
+ for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)
+ SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);
+
+ return true;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ unsigned Opcode = MI->getOpcode();
+
+ if (Opcode == PPC::INLINEASM) {
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const char *AsmStr = MI->getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ } else {
+ const MCInstrDesc &Desc = get(Opcode);
+ return Desc.getSize();
+ }
+}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-fma-mutate"
+
+namespace {
+ // PPCVSXFMAMutate pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXFMAMutate : public MachineFunctionPass {
+ static char ID;
+ PPCVSXFMAMutate() : MachineFunctionPass(ID) {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ }
+
+ LiveIntervals *LIS;
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+
+ // The default (A-type) VSX FMA form kills the addend (it is taken from
+ // the target register, which is then updated to reflect the result of
+ // the FMA). If the instruction, however, kills one of the registers
+ // used for the product, then we can use the M-form instruction (which
+ // will take that value from the to-be-defined register).
+
+ int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+ if (AltOpc == -1)
+ continue;
+
+ // This pass is run after register coalescing, and so we're looking for
+ // a situation like this:
+ // ...
+ // %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // ...
+ // %vreg9<def,tied1> = XSMADDADP %vreg9<tied0>, %vreg17, %vreg19,
+ // %RM<imp-use>; VSLRC:%vreg9,%vreg17,%vreg19
+ // ...
+ // Where we can eliminate the copy by changing from the A-type to the
+ // M-type instruction. Specifically, for this example, this means:
+ // %vreg5<def,tied1> = XSMADDADP %vreg5<tied0>, %vreg17, %vreg16,
+ // %RM<imp-use>; VSLRC:%vreg5,%vreg17,%vreg16
+ // is replaced by:
+ // %vreg16<def,tied1> = XSMADDMDP %vreg16<tied0>, %vreg18, %vreg9,
+ // %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
+ // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
+
+ SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
+
+ VNInfo *AddendValNo =
+ LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+ MachineInstr *AddendMI = LIS->getInstructionFromIndex(AddendValNo->def);
+
+ // The addend and this instruction must be in the same block.
+
+ if (!AddendMI || AddendMI->getParent() != MI->getParent())
+ continue;
+
+ // The addend must be a full copy within the same register class.
+
+ if (!AddendMI->isFullCopy())
+ continue;
+
+ unsigned AddendSrcReg = AddendMI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(AddendSrcReg)) {
+ if (MRI.getRegClass(AddendMI->getOperand(0).getReg()) !=
+ MRI.getRegClass(AddendSrcReg))
+ continue;
+ } else {
+ // If AddendSrcReg is a physical register, make sure the destination
+ // register class contains it.
+ if (!MRI.getRegClass(AddendMI->getOperand(0).getReg())
+ ->contains(AddendSrcReg))
+ continue;
+ }
+
+ // In theory, there could be other uses of the addend copy before this
+ // fma. We could deal with this, but that would require additional
+ // logic below and I suspect it will not occur in any relevant
+ // situations.
+ bool OtherUsers = false;
+ for (auto J = std::prev(I), JE = MachineBasicBlock::iterator(AddendMI);
+ J != JE; --J)
+ if (J->readsVirtualRegister(AddendMI->getOperand(0).getReg())) {
+ OtherUsers = true;
+ break;
+ }
+
+ if (OtherUsers)
+ continue;
+
+ // Find one of the product operands that is killed by this instruction.
+
+ unsigned KilledProdOp = 0, OtherProdOp = 0;
+ if (LIS->getInterval(MI->getOperand(2).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 2;
+ OtherProdOp = 3;
+ } else if (LIS->getInterval(MI->getOperand(3).getReg())
+ .Query(FMAIdx).isKill()) {
+ KilledProdOp = 3;
+ OtherProdOp = 2;
+ }
+
+ // If there are no killed product operands, then this transformation is
+ // likely not profitable.
+ if (!KilledProdOp)
+ continue;
+
+ // In order to replace the addend here with the source of the copy,
+ // it must still be live here.
+ if (!LIS->getInterval(AddendMI->getOperand(1).getReg()).liveAt(FMAIdx))
+ continue;
+
+ // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
+
+ unsigned AddReg = AddendMI->getOperand(1).getReg();
+ unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
+ unsigned OtherProdReg = MI->getOperand(OtherProdOp).getReg();
+
+ unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
+ unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
+ unsigned OtherProdSubReg = MI->getOperand(OtherProdOp).getSubReg();
+
+ bool AddRegKill = AddendMI->getOperand(1).isKill();
+ bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
+ bool OtherProdRegKill = MI->getOperand(OtherProdOp).isKill();
+
+ bool AddRegUndef = AddendMI->getOperand(1).isUndef();
+ bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
+ bool OtherProdRegUndef = MI->getOperand(OtherProdOp).isUndef();
+
+ unsigned OldFMAReg = MI->getOperand(0).getReg();
+
+ assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
+ "Addend copy not tied to old FMA output!");
+
+ DEBUG(dbgs() << "VSX FMA Mutation:\n " << *MI;);
+
+ MI->getOperand(0).setReg(KilledProdReg);
+ MI->getOperand(1).setReg(KilledProdReg);
+ MI->getOperand(3).setReg(AddReg);
+ MI->getOperand(2).setReg(OtherProdReg);
+
+ MI->getOperand(0).setSubReg(KilledProdSubReg);
+ MI->getOperand(1).setSubReg(KilledProdSubReg);
+ MI->getOperand(3).setSubReg(AddSubReg);
+ MI->getOperand(2).setSubReg(OtherProdSubReg);
+
+ MI->getOperand(1).setIsKill(KilledProdRegKill);
+ MI->getOperand(3).setIsKill(AddRegKill);
+ MI->getOperand(2).setIsKill(OtherProdRegKill);
+
+ MI->getOperand(1).setIsUndef(KilledProdRegUndef);
+ MI->getOperand(3).setIsUndef(AddRegUndef);
+ MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+
+ MI->setDesc(TII->get(AltOpc));
+
+ DEBUG(dbgs() << " -> " << *MI);
+
+ // The killed product operand was killed here, so we can reuse it now
+ // for the result of the fma.
+
+ LiveInterval &FMAInt = LIS->getInterval(OldFMAReg);
+ VNInfo *FMAValNo = FMAInt.getVNInfoAt(FMAIdx.getRegSlot());
+ for (auto UI = MRI.reg_nodbg_begin(OldFMAReg), UE = MRI.reg_nodbg_end();
+ UI != UE;) {
+ MachineOperand &UseMO = *UI;
+ MachineInstr *UseMI = UseMO.getParent();
+ ++UI;
+
+ // Don't replace the result register of the copy we're about to erase.
+ if (UseMI == AddendMI)
+ continue;
+
+ UseMO.setReg(KilledProdReg);
+ UseMO.setSubReg(KilledProdSubReg);
+ }
+
+ // Extend the live intervals of the killed product operand to hold the
+ // fma result.
+
+ LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
+ for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
+ AI != AE; ++AI) {
+ // Don't add the segment that corresponds to the original copy.
+ if (AI->valno == AddendValNo)
+ continue;
+
+ VNInfo *NewFMAValNo =
+ NewFMAInt.getNextValue(AI->start,
+ LIS->getVNInfoAllocator());
+
+ NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
+ NewFMAValNo));
+ }
+ DEBUG(dbgs() << " extended: " << NewFMAInt << '\n');
+
+ FMAInt.removeValNo(FMAValNo);
+ DEBUG(dbgs() << " trimmed: " << FMAInt << '\n');
+
+ // Remove the (now unused) copy.
+
+ DEBUG(dbgs() << " removing: " << *AddendMI << '\n');
+ LIS->RemoveMachineInstrFromMaps(AddendMI);
+ AddendMI->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX then go ahead and return without doing
+ // anything.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+
+ LIS = &getAnalysis<LiveIntervals>();
+
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ if (DisableVSXFMAMutate)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
+ "PowerPC VSX FMA Mutation", false, false)
+
+char &llvm::PPCVSXFMAMutateID = PPCVSXFMAMutate::ID;
+
+char PPCVSXFMAMutate::ID = 0;
+FunctionPass*
+llvm::createPPCVSXFMAMutatePass() { return new PPCVSXFMAMutate(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy"
+
+namespace llvm {
+ void initializePPCVSXCopyPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopy pass - For copies between VSX registers and non-VSX registers
+ // (Altivec and scalar floating-point registers), we need to transform the
+ // copies into subregister copies with other restrictions.
+ struct PPCVSXCopy : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopy() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+ bool IsRegInClass(unsigned Reg, const TargetRegisterClass *RC,
+ MachineRegisterInfo &MRI) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ return RC->hasSubClassEq(MRI.getRegClass(Reg));
+ } else if (RC->contains(Reg)) {
+ return true;
+ }
+
+ return false;
+ }
+
+ bool IsVSReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VSRCRegClass, MRI);
+ }
+
+ bool IsVRReg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::VRRCRegClass, MRI);
+ }
+
+ bool IsF8Reg(unsigned Reg, MachineRegisterInfo &MRI) {
+ return IsRegInClass(Reg, &PPC::F8RCRegClass, MRI);
+ }
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (!MI->isFullCopy())
+ continue;
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ if ( IsVSReg(DstMO.getReg(), MRI) &&
+ !IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *to* a VSX register from a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *SrcRC =
+ IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(SrcMO.getReg(), MRI) ||
+ IsVRReg(SrcMO.getReg(), MRI)) &&
+ "Unknown source for a VSX copy");
+
+ unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
+ .addImm(1) // add 1, not 0, because there is no implicit clearing
+ // of the high bits.
+ .addOperand(SrcMO)
+ .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+
+ // The source of the original copy is now the new virtual register.
+ SrcMO.setReg(NewVReg);
+ } else if (!IsVSReg(DstMO.getReg(), MRI) &&
+ IsVSReg(SrcMO.getReg(), MRI)) {
+ // This is a copy *from* a VSX register to a non-VSX register.
+ Changed = true;
+
+ const TargetRegisterClass *DstRC =
+ IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
+ &PPC::VSLRCRegClass;
+ assert((IsF8Reg(DstMO.getReg(), MRI) ||
+ IsVRReg(DstMO.getReg(), MRI)) &&
+ "Unknown destination for a VSX copy");
+
+ // Copy the VSX value into a new VSX register of the correct subclass.
+ unsigned NewVReg = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg)
+ .addOperand(SrcMO);
+
+ // Transform the original copy into a subregister extraction copy.
+ SrcMO.setReg(NewVReg);
+ SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
+ PPC::sub_64);
+ }
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX on the subtarget, don't do anything.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopy, DEBUG_TYPE,
+ "PowerPC VSX Copy Legalization", false, false)
+
+char PPCVSXCopy::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyPass() { return new PPCVSXCopy(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-vsx-copy-cleanup"
+
+namespace llvm {
+ void initializePPCVSXCopyCleanupPass(PassRegistry&);
+}
+
+namespace {
+ // PPCVSXCopyCleanup pass - We sometimes end up generating self copies of VSX
+ // registers (mostly because the ABI code still places all values into the
+ // "traditional" floating-point and vector registers). Remove them here.
+ struct PPCVSXCopyCleanup : public MachineFunctionPass {
+ static char ID;
+ PPCVSXCopyCleanup() : MachineFunctionPass(ID) {
+ initializePPCVSXCopyCleanupPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+
+ SmallVector<MachineInstr *, 4> ToDelete;
+ for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
+ I != IE; ++I) {
+ MachineInstr *MI = I;
+ if (MI->getOpcode() == PPC::XXLOR &&
+ MI->getOperand(0).getReg() == MI->getOperand(1).getReg() &&
+ MI->getOperand(0).getReg() == MI->getOperand(2).getReg())
+ ToDelete.push_back(MI);
+ }
+
+ if (!ToDelete.empty())
+ Changed = true;
+
+ for (unsigned i = 0, ie = ToDelete.size(); i != ie; ++i) {
+ DEBUG(dbgs() << "Removing VSX self-copy: " << *ToDelete[i]);
+ ToDelete[i]->eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX don't bother doing anything here.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCVSXCopyCleanup, DEBUG_TYPE,
+ "PowerPC VSX Copy Cleanup", false, false)
+
+char PPCVSXCopyCleanup::ID = 0;
+FunctionPass*
+llvm::createPPCVSXCopyCleanupPass() { return new PPCVSXCopyCleanup(); }
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "ppc-early-ret"
+STATISTIC(NumBCLR, "Number of early conditional returns");
+STATISTIC(NumBLR, "Number of early returns");
+
+namespace llvm {
+ void initializePPCEarlyReturnPass(PassRegistry&);
+}
+
+namespace {
+ // PPCEarlyReturn pass - For simple functions without epilogue code, move
+ // returns up, and create conditional returns, to avoid unnecessary
+ // branch-to-blr sequences.
+ struct PPCEarlyReturn : public MachineFunctionPass {
+ static char ID;
+ PPCEarlyReturn() : MachineFunctionPass(ID) {
+ initializePPCEarlyReturnPass(*PassRegistry::getPassRegistry());
+ }
+
+ const PPCTargetMachine *TM;
+ const PPCInstrInfo *TII;
+
+protected:
+ bool processBlock(MachineBasicBlock &ReturnMBB) {
+ bool Changed = false;
+
+ MachineBasicBlock::iterator I = ReturnMBB.begin();
+ I = ReturnMBB.SkipPHIsAndLabels(I);
+
+ // The block must be essentially empty except for the blr.
+ if (I == ReturnMBB.end() || I->getOpcode() != PPC::BLR ||
+ I != ReturnMBB.getLastNonDebugInstr())
+ return Changed;
+
+ SmallVector<MachineBasicBlock*, 8> PredToRemove;
+ for (MachineBasicBlock::pred_iterator PI = ReturnMBB.pred_begin(),
+ PIE = ReturnMBB.pred_end(); PI != PIE; ++PI) {
+ bool OtherReference = false, BlockChanged = false;
+ for (MachineBasicBlock::iterator J = (*PI)->getLastNonDebugInstr();;) {
+ if (J->getOpcode() == PPC::B) {
+ if (J->getOperand(0).getMBB() == &ReturnMBB) {
+ // This is an unconditional branch to the return. Replace the
+ // branch with a blr.
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BLR));
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BCC) {
+ if (J->getOperand(2).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+ .addImm(J->getOperand(0).getImm())
+ .addReg(J->getOperand(1).getReg());
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->getOpcode() == PPC::BC || J->getOpcode() == PPC::BCn) {
+ if (J->getOperand(1).getMBB() == &ReturnMBB) {
+ // This is a conditional branch to the return. Replace the branch
+ // with a bclr.
+ BuildMI(**PI, J, J->getDebugLoc(),
+ TII->get(J->getOpcode() == PPC::BC ?
+ PPC::BCLR : PPC::BCLRn))
+ .addReg(J->getOperand(0).getReg());
+ MachineBasicBlock::iterator K = J--;
+ K->eraseFromParent();
+ BlockChanged = true;
+ ++NumBCLR;
+ continue;
+ }
+ } else if (J->isBranch()) {
+ if (J->isIndirectBranch()) {
+ if (ReturnMBB.hasAddressTaken())
+ OtherReference = true;
+ } else
+ for (unsigned i = 0; i < J->getNumOperands(); ++i)
+ if (J->getOperand(i).isMBB() &&
+ J->getOperand(i).getMBB() == &ReturnMBB)
+ OtherReference = true;
+ } else if (!J->isTerminator() && !J->isDebugValue())
+ break;
+
+ if (J == (*PI)->begin())
+ break;
+
+ --J;
+ }
+
+ if ((*PI)->canFallThrough() && (*PI)->isLayoutSuccessor(&ReturnMBB))
+ OtherReference = true;
+
+ // Predecessors are stored in a vector and can't be removed here.
+ if (!OtherReference && BlockChanged) {
+ PredToRemove.push_back(*PI);
+ }
+
+ if (BlockChanged)
+ Changed = true;
+ }
+
+ for (unsigned i = 0, ie = PredToRemove.size(); i != ie; ++i)
+ PredToRemove[i]->removeSuccessor(&ReturnMBB);
+
+ if (Changed && !ReturnMBB.hasAddressTaken()) {
+ // We now might be able to merge this blr-only block into its
+ // by-layout predecessor.
+ if (ReturnMBB.pred_size() == 1 &&
+ (*ReturnMBB.pred_begin())->isLayoutSuccessor(&ReturnMBB)) {
+ // Move the blr into the preceding block.
+ MachineBasicBlock &PrevMBB = **ReturnMBB.pred_begin();
+ PrevMBB.splice(PrevMBB.end(), &ReturnMBB, I);
+ PrevMBB.removeSuccessor(&ReturnMBB);
+ }
+
+ if (ReturnMBB.pred_empty())
+ ReturnMBB.eraseFromParent();
+ }
+
+ return Changed;
+ }
+
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ TII = TM->getInstrInfo();
+
+ bool Changed = false;
+
+ // If the function does not have at least two blocks, then there is
+ // nothing to do.
+ if (MF.size() < 2)
+ return Changed;
+
+ for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
+ MachineBasicBlock &B = *I++;
+ if (processBlock(B))
+ Changed = true;
+ }
+
+ return Changed;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+INITIALIZE_PASS(PPCEarlyReturn, DEBUG_TYPE,
+ "PowerPC Early-Return Creation", false, false)
+
+char PPCEarlyReturn::ID = 0;
+FunctionPass*
+llvm::createPPCEarlyReturnPass() { return new PPCEarlyReturn(); }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 000000000000..83f14c6cf214
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,235 @@
+//===-- PPCInstrInfo.h - PowerPC Instruction Information --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRUCTIONINFO_H
+#define POWERPC_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags. These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+ // PPC970 Instruction Flags. These flags describe the characteristics of the
+ // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+ // raw machine instructions.
+
+ /// PPC970_First - This instruction starts a new dispatch group, so it will
+ /// always be the first one in the group.
+ PPC970_First = 0x1,
+
+ /// PPC970_Single - This instruction starts a new dispatch group and
+ /// terminates it, so it will be the sole instruction in the group.
+ PPC970_Single = 0x2,
+
+ /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+ /// two dispatch pipes to be available to issue.
+ PPC970_Cracked = 0x4,
+
+ /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+ /// an instruction is issued to.
+ PPC970_Shift = 3,
+ PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+ /// These are the various PPC970 execution unit pipelines. Each instruction
+ /// is one of these.
+ PPC970_Pseudo = 0 << PPC970_Shift, // Pseudo instruction
+ PPC970_FXU = 1 << PPC970_Shift, // Fixed Point (aka Integer/ALU) Unit
+ PPC970_LSU = 2 << PPC970_Shift, // Load Store Unit
+ PPC970_FPU = 3 << PPC970_Shift, // Floating Point Unit
+ PPC970_CRU = 4 << PPC970_Shift, // Control Register Unit
+ PPC970_VALU = 5 << PPC970_Shift, // Vector ALU
+ PPC970_VPERM = 6 << PPC970_Shift, // Vector Permute Unit
+ PPC970_BRU = 7 << PPC970_Shift // Branch Unit
+};
+} // end namespace PPCII
+
+
+class PPCInstrInfo : public PPCGenInstrInfo {
+ PPCSubtarget &Subtarget;
+ const PPCRegisterInfo RI;
+
+ bool StoreRegToStackSlot(MachineFunction &MF,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
+ bool LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs,
+ bool &NonRI, bool &SpillsVRS) const;
+ virtual void anchor();
+public:
+ explicit PPCInstrInfo(PPCSubtarget &STI);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const override;
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const override;
+
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
+ int getOperandLatency(const InstrItineraryData *ItinData,
+ SDNode *DefNode, unsigned DefIdx,
+ SDNode *UseNode, unsigned UseIdx) const override {
+ return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
+ UseNode, UseIdx);
+ }
+
+ bool isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const override;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ // commuteInstruction - We can commute rlwimi instructions, but only if the
+ // rotate amt is zero. We also have to munge the immediates a bit.
+ MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
+
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
+
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
+
+
+ // Branch analysis.
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
+
+ // Select analysis.
+ bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const override;
+ void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const override;
+
+ // If conversion by predication (only supported by some branch instructions).
+ // All of the profitability checks always return true; it is always
+ // profitable to use the predicated branches.
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const override {
+ return true;
+ }
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const override;
+
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const override {
+ return true;
+ }
+
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const override {
+ return false;
+ }
+
+ // Predication support.
+ bool isPredicated(const MachineInstr *MI) const override;
+
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const override;
+
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Pred) const override;
+
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const override;
+
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
+
+ bool isPredicable(MachineInstr *MI) const override;
+
+ // Comparison optimization.
+
+
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
+
+ bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const override;
+
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ ///
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 000000000000..636ac5d571fa
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,3424 @@
+//===-- PPCInstrInfo.td - The PowerPC Instruction Set ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
+ SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm : SDTypeProfile<1, 3, [
+ SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+ SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+ SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+ SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+ SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+def tocentry32 : Operand<iPTR> {
+ let MIOperandInfo = (ops i32imm:$imm);
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfre : SDNode<"PPCISD::FRE", SDTFPUnaryOp, []>;
+def PPCfrsqrte: SDNode<"PPCISD::FRSQRTE", SDTFPUnaryOp, []>;
+
+def PPCfcfid : SDNode<"PPCISD::FCFID", SDTFPUnaryOp, []>;
+def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>;
+def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>;
+def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>;
+def PPCfctiwuz: SDNode<"PPCISD::FCTIWUZ",SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+ [SDNPHasChain, SDNPMayStore]>;
+def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
+ [SDNPHasChain, SDNPMayLoad]>;
+
+// Extract FPSCR (not modeled at the DAG level).
+def PPCmffs : SDNode<"PPCISD::MFFS",
+ SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>, []>;
+
+// Perform FADD in round-to-zero mode.
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+
+
+def PPCfsel : SDNode<"PPCISD::FSEL",
+ // Type constraint for fsel.
+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,
+ SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCppc32GOT : SDNode<"PPCISD::PPC32_GOT", SDTIntLeaf, []>;
+
+def PPCaddisGotTprelHA : SDNode<"PPCISD::ADDIS_GOT_TPREL_HA", SDTIntBinOp>;
+def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
+ [SDNPMayLoad]>;
+def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
+def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
+def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
+def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
+def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
+def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
+def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
+ [SDNPHasChain]>;
+def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
+
+def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts. These nodes are generated by the multi-precision shift code.
+def PPCsrl : SDNode<"PPCISD::SRL" , SDTIntShiftOp>;
+def PPCsra : SDNode<"PPCISD::SRA" , SDTIntShiftOp>;
+def PPCshl : SDNode<"PPCISD::SHL" , SDTIntShiftOp>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
+ [SDNPHasChain, SDNPSideEffect,
+ SDNPInGlue, SDNPOutGlue]>;
+def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCeh_sjlj_setjmp : SDNode<"PPCISD::EH_SJLJ_SETJMP",
+ SDTypeProfile<1, 1, [SDTCisInt<0>,
+ SDTCisPtrTy<1>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
+ SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+ [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to set/unset CR bit 6 for SVR4 vararg calls
+def PPCcr6set : SDNode<"PPCISD::CR6SET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCcr6unset : SDNode<"PPCISD::CR6UNSET", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Instructions to support atomic operations
+def PPClarx : SDNode<"PPCISD::LARX", SDT_PPClarx,
+ [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+ [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support medium and large code model
+def PPCaddisTocHA : SDNode<"PPCISD::ADDIS_TOC_HA", SDTIntBinOp, []>;
+def PPCldTocL : SDNode<"PPCISD::LD_TOC_L", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCaddiTocL : SDNode<"PPCISD::ADDI_TOC_L", SDTIntBinOp, []>;
+
+
+// Instructions to support dynamic alloca.
+def SDTDynOp : SDTypeProfile<1, 2, []>;
+def PPCdynalloc : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+ // Transformation function: 31 - imm
+ return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+ // Transformation function: 32 - imm
+ return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: get the low 16 bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ signed int Val = N->getZExtValue();
+ return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+ // Transformation function: get the start bit of a mask
+ unsigned mb = 0, me;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+ // Transformation function: get the end bit of a mask
+ unsigned mb, me = 0;
+ (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+ // maskImm predicate - True if immediate is a run of ones.
+ unsigned mb, me;
+ if (N->getValueType(0) == MVT::i32)
+ return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+ else
+ return false;
+}]>;
+
+def imm32SExt16 : Operand<i32>, ImmLeaf<i32, [{
+ // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit
+ // sign extended field. Used by instructions like 'addi'.
+ return (int32_t)Imm == (short)Imm;
+}]>;
+def imm64SExt16 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit
+ // sign extended field. Used by instructions like 'addi'.
+ return (int64_t)Imm == (short)Imm;
+}]>;
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field. Used by instructions like 'ori'.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero. There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt. These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+ // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'xoris'.
+ return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+ // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+ // immediate are set. Used by instructions like 'addis'. Identical to
+ // imm16ShiftedZExt in 32-bit mode.
+ if (N->getZExtValue() & 0xFFFF) return false;
+ if (N->getValueType(0) == MVT::i32)
+ return true;
+ // For 64-bit, make sure it is sext right.
+ return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+def imm64ZExt32 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64ZExt32 predicate - True if the i64 immediate fits in a 32-bit
+ // zero extended field.
+ return isUInt<32>(Imm);
+}]>;
+
+// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
+// restricted memrix (4-aligned) constants are alignment sensitive. If these
+// offsets are hidden behind TOC entries than the values of the lower-order
+// bits cannot be checked directly. As a result, we need to also incorporate
+// an alignment check into the relevant patterns.
+
+def aligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 4;
+}]>;
+def aligned4pre_store : PatFrag<
+ (ops node:$val, node:$base, node:$offset),
+ (pre_store node:$val, node:$base, node:$offset), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 4;
+}]>;
+
+def unaligned4load : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4store : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() < 4;
+}]>;
+def unaligned4sextloadi32 : PatFrag<(ops node:$ptr), (sextloadi32 node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() < 4;
+}]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT { bit RC = 1; }
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+// In the default PowerPC assembler syntax, registers are specified simply
+// by number, so they cannot be distinguished from immediate values (without
+// looking at the opcode). This means that the default operand matching logic
+// for the asm parser does not work, and we need to specify custom matchers.
+// Since those can only be specified with RegisterOperand classes and not
+// directly on the RegisterClass, all instructions patterns used by the asm
+// parser need to use a RegisterOperand (instead of a RegisterClass) for
+// all their register operands.
+// For this purpose, we define one RegisterOperand for each RegisterClass,
+// using the same name as the class, just in lower case.
+
+def PPCRegGPRCAsmOperand : AsmOperandClass {
+ let Name = "RegGPRC"; let PredicateMethod = "isRegNumber";
+}
+def gprc : RegisterOperand<GPRC> {
+ let ParserMatchClass = PPCRegGPRCAsmOperand;
+}
+def PPCRegG8RCAsmOperand : AsmOperandClass {
+ let Name = "RegG8RC"; let PredicateMethod = "isRegNumber";
+}
+def g8rc : RegisterOperand<G8RC> {
+ let ParserMatchClass = PPCRegG8RCAsmOperand;
+}
+def PPCRegGPRCNoR0AsmOperand : AsmOperandClass {
+ let Name = "RegGPRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def gprc_nor0 : RegisterOperand<GPRC_NOR0> {
+ let ParserMatchClass = PPCRegGPRCNoR0AsmOperand;
+}
+def PPCRegG8RCNoX0AsmOperand : AsmOperandClass {
+ let Name = "RegG8RCNoX0"; let PredicateMethod = "isRegNumber";
+}
+def g8rc_nox0 : RegisterOperand<G8RC_NOX0> {
+ let ParserMatchClass = PPCRegG8RCNoX0AsmOperand;
+}
+def PPCRegF8RCAsmOperand : AsmOperandClass {
+ let Name = "RegF8RC"; let PredicateMethod = "isRegNumber";
+}
+def f8rc : RegisterOperand<F8RC> {
+ let ParserMatchClass = PPCRegF8RCAsmOperand;
+}
+def PPCRegF4RCAsmOperand : AsmOperandClass {
+ let Name = "RegF4RC"; let PredicateMethod = "isRegNumber";
+}
+def f4rc : RegisterOperand<F4RC> {
+ let ParserMatchClass = PPCRegF4RCAsmOperand;
+}
+def PPCRegVRRCAsmOperand : AsmOperandClass {
+ let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
+}
+def vrrc : RegisterOperand<VRRC> {
+ let ParserMatchClass = PPCRegVRRCAsmOperand;
+}
+def PPCRegCRBITRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
+}
+def crbitrc : RegisterOperand<CRBITRC> {
+ let ParserMatchClass = PPCRegCRBITRCAsmOperand;
+}
+def PPCRegCRRCAsmOperand : AsmOperandClass {
+ let Name = "RegCRRC"; let PredicateMethod = "isCCRegNumber";
+}
+def crrc : RegisterOperand<CRRC> {
+ let ParserMatchClass = PPCRegCRRCAsmOperand;
+}
+
+def PPCU2ImmAsmOperand : AsmOperandClass {
+ let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u2imm : Operand<i32> {
+ let PrintMethod = "printU2ImmOperand";
+ let ParserMatchClass = PPCU2ImmAsmOperand;
+}
+def PPCS5ImmAsmOperand : AsmOperandClass {
+ let Name = "S5Imm"; let PredicateMethod = "isS5Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s5imm : Operand<i32> {
+ let PrintMethod = "printS5ImmOperand";
+ let ParserMatchClass = PPCS5ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<5>";
+}
+def PPCU5ImmAsmOperand : AsmOperandClass {
+ let Name = "U5Imm"; let PredicateMethod = "isU5Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u5imm : Operand<i32> {
+ let PrintMethod = "printU5ImmOperand";
+ let ParserMatchClass = PPCU5ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<5>";
+}
+def PPCU6ImmAsmOperand : AsmOperandClass {
+ let Name = "U6Imm"; let PredicateMethod = "isU6Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u6imm : Operand<i32> {
+ let PrintMethod = "printU6ImmOperand";
+ let ParserMatchClass = PPCU6ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<6>";
+}
+def PPCS16ImmAsmOperand : AsmOperandClass {
+ let Name = "S16Imm"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s16imm : Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS16ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def PPCU16ImmAsmOperand : AsmOperandClass {
+ let Name = "U16Imm"; let PredicateMethod = "isU16Imm";
+ let RenderMethod = "addImmOperands";
+}
+def u16imm : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCU16ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<16>";
+}
+def PPCS17ImmAsmOperand : AsmOperandClass {
+ let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s17imm : Operand<i32> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+ let DecoderMethod = "decodeSImmOperand<16>";
+}
+def PPCDirectBrAsmOperand : AsmOperandClass {
+ let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
+def directbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def absdirectbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCondBrAsmOperand : AsmOperandClass {
+ let Name = "CondBr"; let PredicateMethod = "isCondBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
+def condbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+}
+def abscondbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+}
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printBranchOperand";
+ let EncoderMethod = "getDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def abscalltarget : Operand<iPTR> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCRBitMaskOperand : AsmOperandClass {
+ let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
+}
+def crbitm: Operand<i8> {
+ let PrintMethod = "printcrbitm";
+ let EncoderMethod = "get_crbitm_encoding";
+ let DecoderMethod = "decodeCRBitMOperand";
+ let ParserMatchClass = PPCCRBitMaskOperand;
+}
+// Address operands
+// A version of ptr_rc which excludes R0 (or X0 in 64-bit mode).
+def PPCRegGxRCNoR0Operand : AsmOperandClass {
+ let Name = "RegGxRCNoR0"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_nor0 : Operand<iPTR>, PointerLikeRegClass<1> {
+ let ParserMatchClass = PPCRegGxRCNoR0Operand;
+}
+// A version of ptr_rc usable with the asm parser.
+def PPCRegGxRCOperand : AsmOperandClass {
+ let Name = "RegGxRC"; let PredicateMethod = "isRegNumber";
+}
+def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
+ let ParserMatchClass = PPCRegGxRCOperand;
+}
+
+def PPCDispRIOperand : AsmOperandClass {
+ let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addImmOperands";
+}
+def dispRI : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIOperand;
+}
+def PPCDispRIXOperand : AsmOperandClass {
+ let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+ let RenderMethod = "addImmOperands";
+}
+def dispRIX : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRIXOperand;
+}
+
+def memri : Operand<iPTR> {
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIEncoding";
+ let DecoderMethod = "decodeMemRIOperands";
+}
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
+}
+def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
+ let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
+ let EncoderMethod = "getMemRIXEncoding";
+ let DecoderMethod = "decodeMemRIXOperands";
+}
+
+// A single-register address. This is used with the SjLj
+// pseudo-instructions.
+def memr : Operand<iPTR> {
+ let MIOperandInfo = (ops ptr_rc:$ptrreg);
+}
+def PPCTLSRegOperand : AsmOperandClass {
+ let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
+ let RenderMethod = "addTLSRegOperands";
+}
+def tlsreg32 : Operand<i32> {
+ let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
+}
+def tlsgd32 : Operand<i32> {}
+def tlscall32 : Operand<i32> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd32:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
+
+// PowerPC Predicate operand.
+def pred : Operand<OtherVT> {
+ let PrintMethod = "printPredicateOperand";
+ let MIOperandInfo = (ops i32imm:$bibo, crrc:$reg);
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
+def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
+
+// The address in a single register. This is used with the SjLj
+// pseudo-instructions.
+def addr : ComplexPattern<iPTR, 1, "SelectAddr",[], []>;
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
+def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
+def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Multiclass Definitions.
+
+multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_6rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XForm_6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_10rc<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XForm_10<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_11r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_11<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XForm_11<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_1r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_1rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XOForm_1<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_3r<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XOForm_3<opcode, xo, oe, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MForm_2<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MForm_2<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MDForm_1r<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MDForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MDForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass MDSForm_1r<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MDSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def o : MDSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XSForm_1rc<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ let Defs = [CARRY] in
+ def NAME : XSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CARRY, CR0] in
+ def o : XSForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_26<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_26<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_1<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_2r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : AForm_3<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : AForm_3<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "#ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "#ADJCALLSTACKUP $amt1 $amt2",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE : Pseudo<(outs gprc:$rD), (ins gprc:$rS),
+ "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs gprc:$result), (ins gprc:$negsize, memri:$fpsi), "#DYNALLOC",
+ [(set i32:$result,
+ (PPCdynalloc i32:$negsize, iaddr:$fpsi))]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation. Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1, // Expanded after instruction selection.
+ PPC970_Single = 1 in {
+ // Note that SELECT_CC_I4 and SELECT_CC_I8 use the no-r0 register classes
+ // because either operand might become the first operand in an isel, and
+ // that operand cannot be r0.
+ def SELECT_CC_I4 : Pseudo<(outs gprc:$dst), (ins crrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F,
+ i32imm:$BROPC), "#SELECT_CC_I4",
+ []>;
+ def SELECT_CC_I8 : Pseudo<(outs g8rc:$dst), (ins crrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F,
+ i32imm:$BROPC), "#SELECT_CC_I8",
+ []>;
+ def SELECT_CC_F4 : Pseudo<(outs f4rc:$dst), (ins crrc:$cond, f4rc:$T, f4rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_F4",
+ []>;
+ def SELECT_CC_F8 : Pseudo<(outs f8rc:$dst), (ins crrc:$cond, f8rc:$T, f8rc:$F,
+ i32imm:$BROPC), "#SELECT_CC_F8",
+ []>;
+ def SELECT_CC_VRRC: Pseudo<(outs vrrc:$dst), (ins crrc:$cond, vrrc:$T, vrrc:$F,
+ i32imm:$BROPC), "#SELECT_CC_VRRC",
+ []>;
+
+ // SELECT_* pseudo instructions, like SELECT_CC_* but taking condition
+ // register bit directly.
+ def SELECT_I4 : Pseudo<(outs gprc:$dst), (ins crbitrc:$cond,
+ gprc_nor0:$T, gprc_nor0:$F), "#SELECT_I4",
+ [(set i32:$dst, (select i1:$cond, i32:$T, i32:$F))]>;
+ def SELECT_I8 : Pseudo<(outs g8rc:$dst), (ins crbitrc:$cond,
+ g8rc_nox0:$T, g8rc_nox0:$F), "#SELECT_I8",
+ [(set i64:$dst, (select i1:$cond, i64:$T, i64:$F))]>;
+ def SELECT_F4 : Pseudo<(outs f4rc:$dst), (ins crbitrc:$cond,
+ f4rc:$T, f4rc:$F), "#SELECT_F4",
+ [(set f32:$dst, (select i1:$cond, f32:$T, f32:$F))]>;
+ def SELECT_F8 : Pseudo<(outs f8rc:$dst), (ins crbitrc:$cond,
+ f8rc:$T, f8rc:$F), "#SELECT_F8",
+ [(set f64:$dst, (select i1:$cond, f64:$T, f64:$F))]>;
+ def SELECT_VRRC: Pseudo<(outs vrrc:$dst), (ins crbitrc:$cond,
+ vrrc:$T, vrrc:$F), "#SELECT_VRRC",
+ [(set v4i32:$dst,
+ (select i1:$cond, v4i32:$T, v4i32:$F))]>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+let mayStore = 1 in {
+def SPILL_CR : Pseudo<(outs), (ins crrc:$cond, memri:$F),
+ "#SPILL_CR", []>;
+def SPILL_CRBIT : Pseudo<(outs), (ins crbitrc:$cond, memri:$F),
+ "#SPILL_CRBIT", []>;
+}
+
+// RESTORE_CR - Indicate that we're restoring the CR register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in {
+def RESTORE_CR : Pseudo<(outs crrc:$cond), (ins memri:$F),
+ "#RESTORE_CR", []>;
+def RESTORE_CRBIT : Pseudo<(outs crbitrc:$cond), (ins memri:$F),
+ "#RESTORE_CRBIT", []>;
+}
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+ let isReturn = 1, Uses = [LR, RM] in
+ def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
+ [(retflag)]>;
+ let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
+ def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
+ "bcctr 4, $bi, 0", IIC_BrB, []>;
+ }
+ }
+}
+
+let Defs = [LR] in
+ def MovePCtoLR : Pseudo<(outs), (ins), "#MovePCtoLR", []>,
+ PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+ let isBarrier = 1 in {
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+ "b $dst", IIC_BrB,
+ [(br bb:$dst)]>;
+ def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
+ "ba $dst", IIC_BrB, []>;
+ }
+
+ // BCC represents an arbitrary conditional branch on a predicate.
+ // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+ // a two-value operand where a dag node expects two operands. :(
+ let isCodeGenOnly = 1 in {
+ def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
+ "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
+
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+ "b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>;
+ }
+
+ let isCodeGenOnly = 1 in {
+ let Pattern = [(brcond i1:$bi, bb:$dst)] in
+ def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 12, $bi, $dst">;
+
+ let Pattern = [(brcond (not i1:$bi), bb:$dst)] in
+ def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
+ "bc 4, $bi, $dst">;
+
+ let isReturn = 1, Uses = [LR, RM] in
+ def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
+ "bclr 12, $bi, 0", IIC_BrB, []>;
+ def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
+ "bclr 4, $bi, 0", IIC_BrB, []>;
+ }
+
+ let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ "bdzlr", IIC_BrB, []>;
+ def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ "bdnzlr", IIC_BrB, []>;
+ def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins),
+ "bdzlr+", IIC_BrB, []>;
+ def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins),
+ "bdnzlr+", IIC_BrB, []>;
+ def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins),
+ "bdzlr-", IIC_BrB, []>;
+ def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins),
+ "bdnzlr-", IIC_BrB, []>;
+ }
+
+ let Defs = [CTR], Uses = [CTR] in {
+ def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz $dst">;
+ def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz $dst">;
+ def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza $dst">;
+ def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza $dst">;
+ def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz+ $dst">;
+ def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz+ $dst">;
+ def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza+ $dst">;
+ def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza+ $dst">;
+ def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz- $dst">;
+ def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz- $dst">;
+ def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza- $dst">;
+ def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza- $dst">;
+ }
+}
+
+// The unconditional BCL used by the SjLj setjmp code.
+let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7 in {
+ let Defs = [LR], Uses = [RM] in {
+ def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bcl 20, 31, $dst">;
+ }
+}
+
+let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
+ // Convenient aliases for call instructions
+ let Uses = [RM] in {
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
+ "bl $func", IIC_BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
+ "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
+
+ let isCodeGenOnly = 1 in {
+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
+ "bl $func", IIC_BrB, []>;
+ def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
+ def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
+ "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
+
+ def BCL : BForm_4<16, 12, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 12, $bi, $dst">;
+ def BCLn : BForm_4<16, 4, 0, 1, (outs),
+ (ins crbitrc:$bi, condbrtarget:$dst),
+ "bcl 4, $bi, $dst">;
+ }
+ }
+ let Uses = [CTR, RM] in {
+ def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
+ "bctrl", IIC_BrB, [(PPCbctrl)]>,
+ Requires<[In32BitMode]>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 12, $bi, 0", IIC_BrB, []>;
+ def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
+ "bcctrl 4, $bi, 0", IIC_BrB, []>;
+ }
+ }
+ let Uses = [LR, RM] in {
+ def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins),
+ "blrl", IIC_BrB, []>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB,
+ []>;
+
+ def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 12, $bi, 0", IIC_BrB, []>;
+ def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi),
+ "bclrl 4, $bi, 0", IIC_BrB, []>;
+ }
+ }
+ let Defs = [CTR], Uses = [CTR, RM] in {
+ def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl $dst">;
+ def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl $dst">;
+ def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla $dst">;
+ def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla $dst">;
+ def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl+ $dst">;
+ def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl+ $dst">;
+ def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla+ $dst">;
+ def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla+ $dst">;
+ def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl- $dst">;
+ def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl- $dst">;
+ def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla- $dst">;
+ def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla- $dst">;
+ }
+ let Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
+ "bdzlrl", IIC_BrB, []>;
+ def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins),
+ "bdnzlrl", IIC_BrB, []>;
+ def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins),
+ "bdzlrl+", IIC_BrB, []>;
+ def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins),
+ "bdnzlrl+", IIC_BrB, []>;
+ def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins),
+ "bdzlrl-", IIC_BrB, []>;
+ def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins),
+ "bdnzlrl-", IIC_BrB, []>;
+ }
+}
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+ (ins calltarget:$dst, i32imm:$offset),
+ "#TC_RETURNd $dst $offset",
+ []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
+ "#TC_RETURNa $func $offset",
+ [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset),
+ "#TC_RETURNr $dst $offset",
+ []>;
+
+
+let isCodeGenOnly = 1 in {
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+ isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
+ []>, Requires<[In32BitMode]>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+ "b $dst", IIC_BrB,
+ []>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+ isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
+ "ba $dst", IIC_BrB,
+ []>;
+
+}
+
+let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ let Defs = [CTR] in
+ def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
+ "#EH_SJLJ_SETJMP32",
+ [(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
+ Requires<[In32BitMode]>;
+ let isTerminator = 1 in
+ def EH_SjLj_LongJmp32 : Pseudo<(outs), (ins memr:$buf),
+ "#EH_SJLJ_LONGJMP32",
+ [(PPCeh_sjlj_longjmp addr:$buf)]>,
+ Requires<[In32BitMode]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+ def EH_SjLj_Setup : Pseudo<(outs), (ins directbrtarget:$dst),
+ "#EH_SjLj_Setup\t$dst", []>;
+}
+
+// System call.
+let PPC970_Unit = 7 in {
+ def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
+ "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
+}
+
+// DCB* instructions.
+def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
+ IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBF : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBT : DCB_Form<278, 0, (outs), (ins memrr:$dst), "dcbt $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst), "dcbtst $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
+ IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+ PPC970_DGroup_Single;
+
+def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
+ (DCBT xoaddr:$dst)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+ let Defs = [CR0] in {
+ def ATOMIC_LOAD_ADD_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I8",
+ [(set i32:$dst, (atomic_load_add_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I8",
+ [(set i32:$dst, (atomic_load_sub_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I8",
+ [(set i32:$dst, (atomic_load_and_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I8",
+ [(set i32:$dst, (atomic_load_or_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "ATOMIC_LOAD_XOR_I8",
+ [(set i32:$dst, (atomic_load_xor_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I8",
+ [(set i32:$dst, (atomic_load_nand_8 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I16",
+ [(set i32:$dst, (atomic_load_add_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I16",
+ [(set i32:$dst, (atomic_load_sub_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I16",
+ [(set i32:$dst, (atomic_load_and_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I16",
+ [(set i32:$dst, (atomic_load_or_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I16",
+ [(set i32:$dst, (atomic_load_xor_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I16",
+ [(set i32:$dst, (atomic_load_nand_16 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_ADD_I32",
+ [(set i32:$dst, (atomic_load_add_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_SUB_I32",
+ [(set i32:$dst, (atomic_load_sub_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_AND_I32",
+ [(set i32:$dst, (atomic_load_and_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_OR_I32",
+ [(set i32:$dst, (atomic_load_or_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_XOR_I32",
+ [(set i32:$dst, (atomic_load_xor_32 xoaddr:$ptr, i32:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$incr), "#ATOMIC_LOAD_NAND_I32",
+ [(set i32:$dst, (atomic_load_nand_32 xoaddr:$ptr, i32:$incr))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I8",
+ [(set i32:$dst, (atomic_cmp_swap_8 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I16 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_16 xoaddr:$ptr, i32:$old, i32:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$old, gprc:$new), "#ATOMIC_CMP_SWAP_I32 $dst $ptr $old $new",
+ [(set i32:$dst, (atomic_cmp_swap_32 xoaddr:$ptr, i32:$old, i32:$new))]>;
+
+ def ATOMIC_SWAP_I8 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_i8",
+ [(set i32:$dst, (atomic_swap_8 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I16 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I16",
+ [(set i32:$dst, (atomic_swap_16 xoaddr:$ptr, i32:$new))]>;
+ def ATOMIC_SWAP_I32 : Pseudo<
+ (outs gprc:$dst), (ins memrr:$ptr, gprc:$new), "#ATOMIC_SWAP_I32",
+ [(set i32:$dst, (atomic_swap_32 xoaddr:$ptr, i32:$new))]>;
+ }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31, 20, (outs gprc:$rD), (ins memrr:$src),
+ "lwarx $rD, $src", IIC_LdStLWARX,
+ [(set i32:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
+ "stwcx. $rS, $dst", IIC_LdStSTWCX,
+ [(PPCstcx i32:$rS, xoaddr:$dst)]>,
+ isDOT;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
+
+def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm),
+ "twi $to, $rA, $imm", IIC_IntTrapW, []>;
+def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB),
+ "tw $to, $rA, $rB", IIC_IntTrapW, []>;
+def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm),
+ "tdi $to, $rA, $imm", IIC_IntTrapD, []>;
+def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
+ "td $to, $rA, $rB", IIC_IntTrapD, []>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
+ "lbz $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
+ "lha $rD, $src", IIC_LdStLHA,
+ [(set i32:$rD, (sextloadi16 iaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
+ "lhz $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
+ "lwz $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
+ "lfs $rD, $src", IIC_LdStLFD,
+ [(set f32:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
+ "lfd $rD, $src", IIC_LdStLFD,
+ [(set f64:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lbzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhau $rD, $addr", IIC_LdStLHAU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lhzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lwzu $rD, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lfsu $rD, $addr", IIC_LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
+ "lfdu $rD, $addr", IIC_LdStLFDU,
+ []>, RegConstraint<"$addr.reg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+
+// Indexed (r+r) Loads with Update (preinc).
+def LBZUX : XForm_1<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lbzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHAUX : XForm_1<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhaux $rD, $addr", IIC_LdStLHAUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LHZUX : XForm_1<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lhzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LWZUX : XForm_1<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lwzux $rD, $addr", IIC_LdStLoadUpdX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFSUX : XForm_1<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lfsux $rD, $addr", IIC_LdStLFDUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+
+def LFDUX : XForm_1<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
+ (ins memrr:$addr),
+ "lfdux $rD, $addr", IIC_LdStLFDUX,
+ []>, RegConstraint<"$addr.ptrreg = $ea_result">,
+ NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31, 87, (outs gprc:$rD), (ins memrr:$src),
+ "lbzx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs gprc:$rD), (ins memrr:$src),
+ "lhax $rD, $src", IIC_LdStLHA,
+ [(set i32:$rD, (sextloadi16 xaddr:$src))]>,
+ PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs gprc:$rD), (ins memrr:$src),
+ "lhzx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31, 23, (outs gprc:$rD), (ins memrr:$src),
+ "lwzx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (load xaddr:$src))]>;
+
+
+def LHBRX : XForm_1<31, 790, (outs gprc:$rD), (ins memrr:$src),
+ "lhbrx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31, 534, (outs gprc:$rD), (ins memrr:$src),
+ "lwbrx $rD, $src", IIC_LdStLoad,
+ [(set i32:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX : XForm_25<31, 535, (outs f4rc:$frD), (ins memrr:$src),
+ "lfsx $frD, $src", IIC_LdStLFD,
+ [(set f32:$frD, (load xaddr:$src))]>;
+def LFDX : XForm_25<31, 599, (outs f8rc:$frD), (ins memrr:$src),
+ "lfdx $frD, $src", IIC_LdStLFD,
+ [(set f64:$frD, (load xaddr:$src))]>;
+
+def LFIWAX : XForm_25<31, 855, (outs f8rc:$frD), (ins memrr:$src),
+ "lfiwax $frD, $src", IIC_LdStLFD,
+ [(set f64:$frD, (PPClfiwax xoaddr:$src))]>;
+def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
+ "lfiwzx $frD, $src", IIC_LdStLFD,
+ [(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
+}
+
+// Load Multiple
+def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
+ "lmw $rD, $src", IIC_LdStLMW, []>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$src),
+ "stb $rS, $src", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, iaddr:$src)]>;
+def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$src),
+ "sth $rS, $src", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, iaddr:$src)]>;
+def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$src),
+ "stw $rS, $src", IIC_LdStStore,
+ [(store i32:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
+ "stfs $rS, $dst", IIC_LdStSTFD,
+ [(store f32:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
+ "stfd $rS, $dst", IIC_LdStSTFD,
+ [(store f64:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
+ "stbu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
+ "sthu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
+ "stwu $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
+ "stfsu $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
+ "stfdu $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STBU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STHU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STWU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFSU $rS, iaddroff:$ptroff, $ptrreg)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
+ (STFDU $rS, iaddroff:$ptroff, $ptrreg)>;
+
+// Indexed (r+r) Stores.
+let PPC970_Unit = 2 in {
+def STBX : XForm_8<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
+ "stbx $rS, $dst", IIC_LdStStore,
+ [(truncstorei8 i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STHX : XForm_8<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthx $rS, $dst", IIC_LdStStore,
+ [(truncstorei16 i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+def STWX : XForm_8<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
+ "stwx $rS, $dst", IIC_LdStStore,
+ [(store i32:$rS, xaddr:$dst)]>,
+ PPC970_DGroup_Cracked;
+
+def STHBRX: XForm_8<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
+ "sthbrx $rS, $dst", IIC_LdStStore,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i16)]>,
+ PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
+ "stwbrx $rS, $dst", IIC_LdStStore,
+ [(PPCstbrx i32:$rS, xoaddr:$dst, i32)]>,
+ PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
+ "stfiwx $frS, $dst", IIC_LdStSTFD,
+ [(PPCstfiwx f64:$frS, xoaddr:$dst)]>;
+
+def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
+ "stfsx $frS, $dst", IIC_LdStSTFD,
+ [(store f32:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
+ "stfdx $frS, $dst", IIC_LdStSTFD,
+ [(store f64:$frS, xaddr:$dst)]>;
+}
+
+// Indexed (r+r) Stores with Update (preinc).
+let PPC970_Unit = 2, mayStore = 1 in {
+def STBUX : XForm_8<31, 247, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
+ "stbux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STHUX : XForm_8<31, 439, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
+ "sthux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STWUX : XForm_8<31, 183, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memrr:$dst),
+ "stwux $rS, $dst", IIC_LdStStoreUpd, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFSUX: XForm_8<31, 695, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memrr:$dst),
+ "stfsux $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+def STFDUX: XForm_8<31, 759, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memrr:$dst),
+ "stfdux $rS, $dst", IIC_LdStSTFDU, []>,
+ RegConstraint<"$dst.ptrreg = $ea_res">, NoEncode<"$ea_res">,
+ PPC970_DGroup_Cracked;
+}
+
+// Patterns to match the pre-inc stores. We can't put the patterns on
+// the instruction definitions directly as ISel wants the address base
+// and offset to be separate operands, not a single complex operand.
+def : Pat<(pre_truncsti8 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STBUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_truncsti16 i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STHUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store i32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STWUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFSUX $rS, $ptrreg, $ptroff)>;
+def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
+ (STFDUX $rS, $ptrreg, $ptroff)>;
+
+// Store Multiple
+def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
+ "stmw $rS, $dst", IIC_LdStLMW, []>;
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
+ "sync $L", IIC_LdStSync, []>, Requires<[IsNotBookE]>;
+
+let isCodeGenOnly = 1 in {
+ def MSYNC : XForm_24_sync<31, 598, (outs), (ins),
+ "msync", IIC_LdStSync, []>, Requires<[IsBookE]> {
+ let L = 0;
+ }
+}
+
+def : Pat<(int_ppc_sync), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(int_ppc_sync), (MSYNC)>, Requires<[IsBookE]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in { // FXU Operations.
+def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
+ "addi $rD, $rA, $imm", IIC_IntSimple,
+ [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
+let BaseName = "addic" in {
+let Defs = [CARRY] in
+def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "addic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
+ RecFormRel, PPC970_DGroup_Cracked;
+let Defs = [CARRY, CR0] in
+def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "addic. $rD, $rA, $imm", IIC_IntGeneral,
+ []>, isDOT, RecFormRel;
+}
+def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
+ "addis $rD, $rA, $imm", IIC_IntSimple,
+ [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+let isCodeGenOnly = 1 in
+def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
+ "la $rD, $sym($rA)", IIC_IntGeneral,
+ [(set i32:$rD, (add i32:$rA,
+ (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "mulli $rD, $rA, $imm", IIC_IntMulLI,
+ [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
+let Defs = [CARRY] in
+def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
+ "subfic $rD, $rA, $imm", IIC_IntGeneral,
+ [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+ def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
+ "li $rD, $imm", IIC_IntSimple,
+ [(set i32:$rD, imm32SExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
+ "lis $rD, $imm", IIC_IntSimple,
+ [(set i32:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let Defs = [CR0] in {
+def ANDIo : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "andi. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
+ isDOT;
+def ANDISo : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "andis. $dst, $src1, $src2", IIC_IntGeneral,
+ [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
+ isDOT;
+}
+def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "ori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
+def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "oris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "xori $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "xoris $dst, $src1, $src2", IIC_IntSimple,
+ [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
+
+def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
+ []>;
+let isCodeGenOnly = 1 in {
+// The POWER6 and POWER7 have special group-terminating nops.
+def NOP_GT_PWR6 : DForm_4_fixedreg_zero<24, 1, (outs), (ins),
+ "ori 1, 1, 0", IIC_IntSimple, []>;
+def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
+ "ori 2, 2, 0", IIC_IntSimple, []>;
+}
+
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
+ "cmpwi $crD, $rA, $imm", IIC_IntCompare>;
+ def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
+ "cmplwi $dst, $src1, $src2", IIC_IntCompare>;
+}
+}
+
+let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+let isCommutable = 1 in {
+defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "nand", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
+defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "and", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+} // isCommutable
+defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "andc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
+defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "or", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
+defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "nor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+} // isCommutable
+defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "orc", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+let isCommutable = 1 in {
+defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "eqv", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
+defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "xor", "$rA, $rS, $rB", IIC_IntSimple,
+ [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+} // isCommutable
+defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "slw", "$rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>;
+defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "srw", "$rA, $rS, $rB", IIC_IntGeneral,
+ [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>;
+defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
+ "sraw", "$rA, $rS, $rB", IIC_IntShift,
+ [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+let neverHasSideEffects = 1 in {
+defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
+ "srawi", "$rA, $rS, $SH", IIC_IntShift,
+ [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>;
+defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
+ "cntlzw", "$rA, $rS", IIC_IntGeneral,
+ [(set i32:$rA, (ctlz i32:$rS))]>;
+defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
+ "extsb", "$rA, $rS", IIC_IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i8))]>;
+defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
+ "extsh", "$rA, $rS", IIC_IntSimple,
+ [(set i32:$rA, (sext_inreg i32:$rS, i16))]>;
+}
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+ "cmpw $crD, $rA, $rB", IIC_IntCompare>;
+ def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
+ "cmplw $crD, $rA, $rB", IIC_IntCompare>;
+}
+}
+let PPC970_Unit = 3 in { // FPU Operations.
+//def FCMPO : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+// "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+let isCompare = 1, neverHasSideEffects = 1 in {
+ def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
+ "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
+}
+
+let Uses = [RM] in {
+ let neverHasSideEffects = 1 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", IIC_FPGeneral,
+ []>;
+ defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiwz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfctiwz f64:$frB))]>;
+
+ defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
+ "frsp", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fround f64:$frB))]>;
+
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frin", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (frnd f64:$frB))]>;
+ defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frin", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (frnd f32:$frB))]>;
+ }
+
+ let neverHasSideEffects = 1 in {
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frip", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fceil f64:$frB))]>;
+ defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frip", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fceil f32:$frB))]>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
+ "friz", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (ftrunc f64:$frB))]>;
+ defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
+ "friz", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (ftrunc f32:$frB))]>;
+ let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+ defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frim", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (ffloor f64:$frB))]>;
+ defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frim", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (ffloor f32:$frB))]>;
+
+ defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fsqrt", "$frD, $frB", IIC_FPSqrtD,
+ [(set f64:$frD, (fsqrt f64:$frB))]>;
+ defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fsqrts", "$frD, $frB", IIC_FPSqrtS,
+ [(set f32:$frD, (fsqrt f32:$frB))]>;
+ }
+ }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+let neverHasSideEffects = 1 in
+defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fmr", "$frD, $frB", IIC_FPGeneral,
+ []>, // (set f32:$frD, f32:$frB)
+ PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fabs f32:$frB))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fabs f64:$frB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fnabs", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
+defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fneg", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fneg f32:$frB))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fneg", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fneg f64:$frB))]>;
+
+defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
+
+// Reciprocal estimates.
+defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fre", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfre f64:$frB))]>;
+defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
+ "fres", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfre f32:$frB))]>;
+defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
+ "frsqrte", "$frD, $frB", IIC_FPGeneral,
+ [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
+ "frsqrtes", "$frD, $frB", IIC_FPGeneral,
+ [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+}
+
+// XL-Form instructions. condition register logical ops.
+//
+let neverHasSideEffects = 1 in
+def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
+ "mcrf $BF, $BFA", IIC_BrMCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+let isCommutable = 1 in {
+def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, i1:$CRB))]>;
+
+def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crnand $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (and i1:$CRA, i1:$CRB)))]>;
+
+def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "cror $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, i1:$CRB))]>;
+
+def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crxor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (xor i1:$CRA, i1:$CRB))]>;
+
+def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crnor $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (or i1:$CRA, i1:$CRB)))]>;
+
+def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "creqv $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (not (xor i1:$CRA, i1:$CRB)))]>;
+} // isCommutable
+
+def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crandc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (and i1:$CRA, (not i1:$CRB)))]>;
+
+def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crorc $CRD, $CRA, $CRB", IIC_BrCR,
+ [(set i1:$CRD, (or i1:$CRA, (not i1:$CRB)))]>;
+
+let isCodeGenOnly = 1 in {
+def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
+ "creqv $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 1)]>;
+
+def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
+ "crxor $dst, $dst, $dst", IIC_BrCR,
+ [(set i1:$dst, 0)]>;
+
+let Defs = [CR1EQ], CRD = 6 in {
+def CR6SET : XLForm_1_ext<19, 289, (outs), (ins),
+ "creqv 6, 6, 6", IIC_BrCR,
+ [(PPCcr6set)]>;
+
+def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
+ "crxor 6, 6, 6", IIC_BrCR,
+ [(PPCcr6unset)]>;
+}
+}
+
+// XFX-Form instructions. Instructions that deal with SPRs.
+//
+
+def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", IIC_SprMFSPR>;
+def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
+ "mtspr $SPR, $RT", IIC_SprMTSPR>;
+
+def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mftb $RT, $SPR", IIC_SprMFTB>, Deprecated<DeprecatedMFTB>;
+
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
+ "mfctr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
+let Pattern = [(int_ppc_mtctr i32:$rS)] in
+def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
+ "mtctr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
+ "mtlr $rS", IIC_SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
+ "mflr $rT", IIC_SprMFSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let isCodeGenOnly = 1 in {
+ // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
+ // like a GPR on the PPC970. As such, copies in and out have the same
+ // performance characteristics as an OR instruction.
+ def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
+ "mtspr 256, $rS", IIC_IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
+ "mfspr $rT, 256", IIC_IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
+ def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
+ (outs VRSAVERC:$reg), (ins gprc:$rS),
+ "mtspr 256, $rS", IIC_IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
+ (ins VRSAVERC:$reg),
+ "mfspr $rT, 256", IIC_IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// SPILL_VRSAVE - Indicate that we're dumping the VRSAVE register,
+// so we'll need to scavenge a register for it.
+let mayStore = 1 in
+def SPILL_VRSAVE : Pseudo<(outs), (ins VRSAVERC:$vrsave, memri:$F),
+ "#SPILL_VRSAVE", []>;
+
+// RESTORE_VRSAVE - Indicate that we're restoring the VRSAVE register (previously
+// spilled), so we'll need to scavenge a register for it.
+let mayLoad = 1 in
+def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
+ "#RESTORE_VRSAVE", []>;
+
+let neverHasSideEffects = 1 in {
+def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
+ "mtocrf $FXM, $ST", IIC_BrMCRX>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
+ "mtcrf $FXM, $rS", IIC_BrMCRX>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
+ "mfocrf $rT, $FXM", IIC_SprMFCRF>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
+ "mfcr $rT", IIC_SprMFCR>,
+ PPC970_MicroCode, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
+
+// Pseudo instruction to perform FADD in round-to-zero mode.
+let usesCustomInserter = 1, Uses = [RM] in {
+ def FADDrtz: Pseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
+ [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+}
+
+// The above pseudo gets expanded to make use of the following instructions
+// to manipulate FPSCR. Note that FPSCR is not modeled at the DAG level.
+let Uses = [RM], Defs = [RM] in {
+ def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+ "mtfsb0 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+ "mtfsb1 $FM", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+ def MTFSF : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
+ "mtfsf $FM, $rT", IIC_IntMTFSB0, []>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+ def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
+ "mffs $rT", IIC_IntMFFS,
+ [(set f64:$rT, (PPCmffs))]>,
+ PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1, neverHasSideEffects = 1 in { // FXU Operations.
+// XO-Form instructions. Arithmetic instructions that can set overflow bit
+let isCommutable = 1 in
+defm ADD4 : XOForm_1r<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "add", "$rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+let isCodeGenOnly = 1 in
+def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
+ "add $rT, $rA, $rB", IIC_IntSimple,
+ [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>;
+let isCommutable = 1 in
+defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "addc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_Cracked;
+
+defm DIVW : XOForm_1r<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divw", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+defm DIVWU : XOForm_1r<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "divwu", "$rT, $rA, $rB", IIC_IntDivW,
+ [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>,
+ PPC970_DGroup_First, PPC970_DGroup_Cracked;
+let isCommutable = 1 in {
+defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
+ [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
+defm MULLW : XOForm_1r<31, 235, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
+ [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+} // isCommutable
+defm SUBF : XOForm_1r<31, 40, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subf", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+ PPC970_DGroup_Cracked;
+defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "neg", "$rT, $rA", IIC_IntSimple,
+ [(set i32:$rT, (ineg i32:$rA))]>;
+let Uses = [CARRY] in {
+let isCommutable = 1 in
+defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "adde", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
+defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "addme", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, -1))]>;
+defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "addze", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (adde i32:$rA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
+ "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
+ [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "subfme", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (sube -1, i32:$rA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
+ "subfze", "$rT, $rA", IIC_IntGeneral,
+ [(set i32:$rT, (sube 0, i32:$rA))]>;
+}
+}
+
+// A-Form instructions. Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3, neverHasSideEffects = 1 in { // FPU Operations.
+let Uses = [RM] in {
+let isCommutable = 1 in {
+ defm FMADD : AForm_1r<63, 29,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT, (fma f64:$FRA, f64:$FRC, f64:$FRB))]>;
+ defm FMADDS : AForm_1r<59, 29,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fma f32:$FRA, f32:$FRC, f32:$FRB))]>;
+ defm FMSUB : AForm_1r<63, 28,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT,
+ (fma f64:$FRA, f64:$FRC, (fneg f64:$FRB)))]>;
+ defm FMSUBS : AForm_1r<59, 28,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT,
+ (fma f32:$FRA, f32:$FRC, (fneg f32:$FRB)))]>;
+ defm FNMADD : AForm_1r<63, 31,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fnmadd", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT,
+ (fneg (fma f64:$FRA, f64:$FRC, f64:$FRB)))]>;
+ defm FNMADDS : AForm_1r<59, 31,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fnmadds", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT,
+ (fneg (fma f32:$FRA, f32:$FRC, f32:$FRB)))]>;
+ defm FNMSUB : AForm_1r<63, 30,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fnmsub", "$FRT, $FRA, $FRC, $FRB", IIC_FPFused,
+ [(set f64:$FRT, (fneg (fma f64:$FRA, f64:$FRC,
+ (fneg f64:$FRB))))]>;
+ defm FNMSUBS : AForm_1r<59, 30,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fnmsubs", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fneg (fma f32:$FRA, f32:$FRC,
+ (fneg f32:$FRB))))]>;
+} // isCommutable
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result. To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+let Interpretation64Bit = 1, isCodeGenOnly = 1 in
+defm FSELD : AForm_1r<63, 23,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC, f8rc:$FRB),
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f64:$FRT, (PPCfsel f64:$FRA, f64:$FRC, f64:$FRB))]>;
+defm FSELS : AForm_1r<63, 23,
+ (outs f4rc:$FRT), (ins f8rc:$FRA, f4rc:$FRC, f4rc:$FRB),
+ "fsel", "$FRT, $FRA, $FRC, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (PPCfsel f64:$FRA, f32:$FRC, f32:$FRB))]>;
+let Uses = [RM] in {
+ let isCommutable = 1 in {
+ defm FADD : AForm_2r<63, 21,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fadd", "$FRT, $FRA, $FRB", IIC_FPAddSub,
+ [(set f64:$FRT, (fadd f64:$FRA, f64:$FRB))]>;
+ defm FADDS : AForm_2r<59, 21,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fadds", "$FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fadd f32:$FRA, f32:$FRB))]>;
+ } // isCommutable
+ defm FDIV : AForm_2r<63, 18,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fdiv", "$FRT, $FRA, $FRB", IIC_FPDivD,
+ [(set f64:$FRT, (fdiv f64:$FRA, f64:$FRB))]>;
+ defm FDIVS : AForm_2r<59, 18,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fdivs", "$FRT, $FRA, $FRB", IIC_FPDivS,
+ [(set f32:$FRT, (fdiv f32:$FRA, f32:$FRB))]>;
+ let isCommutable = 1 in {
+ defm FMUL : AForm_3r<63, 25,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRC),
+ "fmul", "$FRT, $FRA, $FRC", IIC_FPFused,
+ [(set f64:$FRT, (fmul f64:$FRA, f64:$FRC))]>;
+ defm FMULS : AForm_3r<59, 25,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRC),
+ "fmuls", "$FRT, $FRA, $FRC", IIC_FPGeneral,
+ [(set f32:$FRT, (fmul f32:$FRA, f32:$FRC))]>;
+ } // isCommutable
+ defm FSUB : AForm_2r<63, 20,
+ (outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB),
+ "fsub", "$FRT, $FRA, $FRB", IIC_FPAddSub,
+ [(set f64:$FRT, (fsub f64:$FRA, f64:$FRB))]>;
+ defm FSUBS : AForm_2r<59, 20,
+ (outs f4rc:$FRT), (ins f4rc:$FRA, f4rc:$FRB),
+ "fsubs", "$FRT, $FRA, $FRB", IIC_FPGeneral,
+ [(set f32:$FRT, (fsub f32:$FRA, f32:$FRB))]>;
+ }
+}
+
+let neverHasSideEffects = 1 in {
+let PPC970_Unit = 1 in { // FXU Operations.
+ let isSelect = 1 in
+ def ISEL : AForm_4<31, 15,
+ (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
+ "isel $rT, $rA, $rB, $cond", IIC_IntGeneral,
+ []>;
+}
+
+let PPC970_Unit = 1 in { // FXU Operations.
+// M-Form instructions. rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
+ (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+ IIC_IntRotate, []>, PPC970_DGroup_Cracked,
+ RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+}
+let BaseName = "rlwinm" in {
+def RLWINM : MForm_2<21,
+ (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ []>, RecFormRel;
+let Defs = [CR0] in
+def RLWINMo : MForm_2<21,
+ (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ []>, isDOT, RecFormRel, PPC970_DGroup_Cracked;
+}
+defm RLWNM : MForm_2r<23, (outs gprc:$rA),
+ (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
+ []>;
+}
+} // neverHasSideEffects = 1
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support. Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+ (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def i32not : OutPatFrag<(ops node:$in),
+ (NOR $in, $in)>;
+def : Pat<(not i32:$in),
+ (i32not $in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add i32:$in, imm:$imm),
+ (ADDIS (ADDI $in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or i32:$in, imm:$imm),
+ (ORIS (ORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor i32:$in, imm:$imm),
+ (XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub imm32SExt16:$imm, i32:$in),
+ (SUBFIC $in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl i32:$in, i32:$sh),
+ (RLWNM $in, $sh, 0, 31)>;
+def : Pat<(rotl i32:$in, (i32 imm:$imm)),
+ (RLWINM $in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
+ (RLWNM $in, $sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
+ (BL tglobaladdr:$dst)>;
+def : Pat<(PPCcall (i32 texternalsym:$dst)),
+ (BL texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
+ (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+ (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+ (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(PPChi tglobaltlsaddr:$g, i32:$in),
+ (ADDIS $in, tglobaltlsaddr:$g)>;
+def : Pat<(PPClo tglobaltlsaddr:$g, i32:$in),
+ (ADDI $in, tglobaltlsaddr:$g)>;
+def : Pat<(add i32:$in, (PPChi tglobaladdr:$g, 0)),
+ (ADDIS $in, tglobaladdr:$g)>;
+def : Pat<(add i32:$in, (PPChi tconstpool:$g, 0)),
+ (ADDIS $in, tconstpool:$g)>;
+def : Pat<(add i32:$in, (PPChi tjumptable:$g, 0)),
+ (ADDIS $in, tjumptable:$g)>;
+def : Pat<(add i32:$in, (PPChi tblockaddress:$g, 0)),
+ (ADDIS $in, tblockaddress:$g)>;
+
+// Support for thread-local storage.
+def PPC32GOT: Pseudo<(outs gprc:$rD), (ins), "#PPC32GOT",
+ [(set i32:$rD, (PPCppc32GOT))]>;
+
+// Get the _GLOBAL_OFFSET_TABLE_ in PIC mode.
+// This uses two output registers, the first as the real output, the second as a
+// temporary register, used internally in code generation.
+def PPC32PICGOT: Pseudo<(outs gprc:$rD, gprc:$rT), (ins), "#PPC32PICGOT",
+ []>, NoEncode<"$rT">;
+
+def LDgotTprelL32: Pseudo<(outs gprc:$rD), (ins s16imm:$disp, gprc_nor0:$reg),
+ "#LDgotTprelL32",
+ [(set i32:$rD,
+ (PPCldGotTprelL tglobaltlsaddr:$disp, i32:$reg))]>;
+def : Pat<(PPCaddTls i32:$in, tglobaltlsaddr:$g),
+ (ADD4TLS $in, tglobaltlsaddr:$g)>;
+
+def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsgdL32",
+ [(set i32:$rD,
+ (PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
+def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "#GETtlsADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
+def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDItlsldL32",
+ [(set i32:$rD,
+ (PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
+def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
+ "#GETtlsldADDR32",
+ [(set i32:$rD,
+ (PPCgetTlsldAddr i32:$reg, tglobaltlsaddr:$sym))]>;
+def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDIdtprelL32",
+ [(set i32:$rD,
+ (PPCaddiDtprelL i32:$reg, tglobaltlsaddr:$disp))]>;
+def ADDISdtprelHA32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
+ "#ADDISdtprelHA32",
+ [(set i32:$rD,
+ (PPCaddisDtprelHA i32:$reg,
+ tglobaltlsaddr:$disp))]>;
+
+// Support for Position-independent code
+def LWZtoc: Pseudo<(outs gprc:$rD), (ins tocentry32:$disp, gprc:$reg),
+ "#LWZtoc",
+ [(set i32:$rD,
+ (PPCtoc_entry tglobaladdr:$disp, i32:$reg))]>;
+// Get Global (GOT) Base Register offset, from the word immediately preceding
+// the function label.
+def GetGBRO: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#GetGBRO", []>;
+// Update the Global(GOT) Base Register with the above offset.
+def UpdateGBR: Pseudo<(outs gprc:$rT), (ins gprc:$rI), "#UpdateGBR", []>;
+
+
+// Standard shifts. These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra i32:$rS, i32:$rB),
+ (SRAW $rS, $rB)>;
+def : Pat<(srl i32:$rS, i32:$rB),
+ (SRW $rS, $rB)>;
+def : Pat<(shl i32:$rS, i32:$rB),
+ (SLW $rS, $rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+ (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+ (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+ (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+ (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+ (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+ (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend f32:$src)),
+ (COPY_TO_REGCLASS $src, F8RC)>;
+
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>, Requires<[IsNotBookE]>;
+def : Pat<(atomic_fence (imm), (imm)), (MSYNC)>, Requires<[IsBookE]>;
+
+// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (FNMSUB $A, $C, $B)>;
+def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
+ (FNMSUBS $A, $C, $B)>;
+
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign f64:$frB, f32:$frA),
+ (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>;
+def : Pat<(fcopysign f32:$frB, f64:$frA),
+ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
+include "PPCInstrVSX.td"
+
+def crnot : OutPatFrag<(ops node:$in),
+ (CRNOR $in, $in)>;
+def : Pat<(not i1:$in),
+ (crnot $in)>;
+
+// Patterns for arithmetic i1 operations.
+def : Pat<(add i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(sub i1:$a, i1:$b),
+ (CRXOR $a, $b)>;
+def : Pat<(mul i1:$a, i1:$b),
+ (CRAND $a, $b)>;
+
+// We're sometimes asked to materialize i1 -1, which is just 1 in this case
+// (-1 is used to mean all bits set).
+def : Pat<(i1 -1), (CRSET)>;
+
+// i1 extensions, implemented in terms of isel.
+def : Pat<(i32 (zext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i32 (sext i1:$in)),
+ (SELECT_I4 $in, (LI -1), (LI 0))>;
+
+def : Pat<(i64 (zext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+def : Pat<(i64 (sext i1:$in)),
+ (SELECT_I8 $in, (LI8 -1), (LI8 0))>;
+
+// FIXME: We should choose either a zext or a sext based on other constants
+// already around.
+def : Pat<(i32 (anyext i1:$in)),
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
+def : Pat<(i64 (anyext i1:$in)),
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
+
+// match setcc on i1 variables.
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULT)),
+ (CRANDC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETLE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETULE)),
+ (CRORC $s2, $s1)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETEQ)),
+ (CREQV $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGE)),
+ (CRORC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETGT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETUGT)),
+ (CRANDC $s1, $s2)>;
+def : Pat<(i1 (setcc i1:$s1, i1:$s2, SETNE)),
+ (CRXOR $s1, $s2)>;
+
+// match setcc on non-i1 (non-vector) variables. Note that SETUEQ, SETOGE,
+// SETOLE, SETONE, SETULT and SETUGT should be expanded by legalize for
+// floating-point types.
+
+multiclass CRNotPat<dag pattern, dag result> {
+ def : Pat<pattern, (crnot result)>;
+ def : Pat<(not pattern), result>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i32 (zext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+ def : Pat<(i32 (sext pattern)),
+ (SELECT_I4 result, (LI 0), (LI -1))>;
+
+ // We can also fold the crnot into an extension:
+ def : Pat<(i64 (zext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+ def : Pat<(i64 (sext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 -1))>;
+
+ // FIXME: We should choose either a zext or a sext based on other constants
+ // already around.
+ def : Pat<(i32 (anyext pattern)),
+ (SELECT_I4 result, (LI 0), (LI 1))>;
+
+ def : Pat<(i64 (anyext pattern)),
+ (SELECT_I8 result, (LI8 0), (LI8 1))>;
+}
+
+// FIXME: Because of what seems like a bug in TableGen's type-inference code,
+// we need to write imm:$imm in the output patterns below, not just $imm, or
+// else the resulting matcher will not correctly add the immediate operand
+// (making it a register operand instead).
+
+// extended SETCC.
+multiclass ExtSetCCPat<CondCode cc, PatFrag pfrag,
+ OutPatFrag rfrag, OutPatFrag rfrag8> {
+ def : Pat<(i32 (zext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (zext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (zext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+
+ def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (rfrag $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (rfrag8 $s1)>;
+ def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, cc)))),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1), sub_32)>;
+ def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, cc)))),
+ (EXTRACT_SUBREG (rfrag8 $s1), sub_32)>;
+}
+
+// Note that we do all inversions below with i(32|64)not, instead of using
+// (xori x, 1) because on the A2 nor has single-cycle latency while xori
+// has 2-cycle latency.
+
+defm : ExtSetCCPat<SETEQ,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (CNTLZW $in), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (CNTLZD $in), 58, 63)> >;
+
+defm : ExtSetCCPat<SETNE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not (CNTLZW $in)), 27, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not (CNTLZD $in)), 58, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ANDC (NEG $in), $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ANDC8 (NEG8 $in), $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, 0, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (ORC $in, (NEG $in)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (ORC8 $in, (NEG8 $in)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (AND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (AND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (NAND $in, (ADDI $in, 1)), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (NAND8 $in, (ADDI8 $in, 1)), 1, 63)> >;
+
+defm : ExtSetCCPat<SETGT,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM (i32not $in), 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL (i64not $in), 1, 63)> >;
+
+defm : ExtSetCCPat<SETLE,
+ PatFrag<(ops node:$in, node:$cc),
+ (setcc $in, -1, $cc)>,
+ OutPatFrag<(ops node:$in),
+ (RLWINM $in, 1, 31, 31)>,
+ OutPatFrag<(ops node:$in),
+ (RLDICL $in, 1, 63)> >;
+
+// SETCC for i32.
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpw cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmplwi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i32:$s1, imm:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, imm32SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPWI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, imm:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLWI (XORIS $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i32:$s1, i32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i32:$s1, i32:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPW $s1, $s2), sub_eq)>;
+
+// SETCC for i64.
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGT)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGT)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+def : Pat<(i1 (setcc i64:$s1, immZExt16:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+// For non-equality comparisons, the default code would materialize the
+// constant, then compare against it, like this:
+// lis r2, 4660
+// ori r2, r2, 22136
+// cmpd cr0, r3, r2
+// beq cr0,L6
+// Since we are just comparing for equality, we can emit this instead:
+// xoris r0,r3,0x1234
+// cmpldi cr0,r0,0x5678
+// beq cr0,L6
+
+def : Pat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETEQ)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETUGE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETGE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETULE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETLE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64SExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPDI $s1, imm:$imm), sub_eq)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, immZExt16:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI $s1, imm:$imm), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, imm64ZExt32:$imm, SETNE)),
+ (EXTRACT_SUBREG (CMPLDI (XORIS8 $s1, (HI16 imm:$imm)),
+ (LO16 imm:$imm)), sub_eq)>;
+
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETULT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETLT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETUGT)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETGT)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc i64:$s1, i64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETGE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETULE)),
+ (EXTRACT_SUBREG (CMPLD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETLE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
+ (EXTRACT_SUBREG (CMPD $s1, $s2), sub_eq)>;
+
+// SETCC for f32.
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f32:$s1, f32:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f32:$s1, f32:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUS $s1, $s2), sub_un)>;
+
+// SETCC for f64.
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETLT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETGT)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETOEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETEQ)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+def : Pat<(i1 (setcc f64:$s1, f64:$s2, SETUO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETGE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_lt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETULE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETLE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_gt)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETUNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETNE)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_eq)>;
+defm : CRNotPat<(i1 (setcc f64:$s1, f64:$s2, SETO)),
+ (EXTRACT_SUBREG (FCMPUD $s1, $s2), sub_un)>;
+
+// match select on i1 variables:
+def : Pat<(i1 (select i1:$cond, i1:$tval, i1:$fval)),
+ (CROR (CRAND $cond , $tval),
+ (CRAND (crnot $cond), $fval))>;
+
+// match selectcc on i1 variables:
+// select (lhs == rhs), tval, fval is:
+// ((lhs == rhs) & tval) | (!(lhs == rhs) & fval)
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLT)),
+ (CROR (CRAND (CRANDC $rhs, $lhs), $tval),
+ (CRAND (CRORC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETLE)),
+ (CROR (CRAND (CRORC $rhs, $lhs), $tval),
+ (CRAND (CRANDC $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETEQ)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $tval),
+ (CRAND (CRXOR $lhs, $rhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGE)),
+ (CROR (CRAND (CRORC $lhs, $rhs), $tval),
+ (CRAND (CRANDC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETGT)),
+ (CROR (CRAND (CRANDC $lhs, $rhs), $tval),
+ (CRAND (CRORC $rhs, $lhs), $fval))>;
+def : Pat <(i1 (selectcc i1:$lhs, i1:$rhs, i1:$tval, i1:$fval, SETNE)),
+ (CROR (CRAND (CREQV $lhs, $rhs), $fval),
+ (CRAND (CRXOR $lhs, $rhs), $tval))>;
+
+// match selectcc on i1 variables with non-i1 output.
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLT)),
+ (SELECT_I4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETLE)),
+ (SELECT_I4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETEQ)),
+ (SELECT_I4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGE)),
+ (SELECT_I4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETGT)),
+ (SELECT_I4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i32 (selectcc i1:$lhs, i1:$rhs, i32:$tval, i32:$fval, SETNE)),
+ (SELECT_I4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLT)),
+ (SELECT_I8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETLE)),
+ (SELECT_I8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETEQ)),
+ (SELECT_I8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGE)),
+ (SELECT_I8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETGT)),
+ (SELECT_I8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(i64 (selectcc i1:$lhs, i1:$rhs, i64:$tval, i64:$fval, SETNE)),
+ (SELECT_I8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
+ (SELECT_F4 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLE)),
+ (SELECT_F4 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETEQ)),
+ (SELECT_F4 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGE)),
+ (SELECT_F4 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETGT)),
+ (SELECT_F4 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
+ (SELECT_F4 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLT)),
+ (SELECT_F8 (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETLE)),
+ (SELECT_F8 (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETEQ)),
+ (SELECT_F8 (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGE)),
+ (SELECT_F8 (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETGT)),
+ (SELECT_F8 (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(f64 (selectcc i1:$lhs, i1:$rhs, f64:$tval, f64:$fval, SETNE)),
+ (SELECT_F8 (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLT)),
+ (SELECT_VRRC (CRANDC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETLE)),
+ (SELECT_VRRC (CRORC $rhs, $lhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETEQ)),
+ (SELECT_VRRC (CREQV $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGE)),
+ (SELECT_VRRC (CRORC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETGT)),
+ (SELECT_VRRC (CRANDC $lhs, $rhs), $tval, $fval)>;
+def : Pat<(v4i32 (selectcc i1:$lhs, i1:$rhs, v4i32:$tval, v4i32:$fval, SETNE)),
+ (SELECT_VRRC (CRXOR $lhs, $rhs), $tval, $fval)>;
+
+let usesCustomInserter = 1 in {
+def ANDIo_1_EQ_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_EQ_BIT",
+ [(set i1:$dst, (trunc (not i32:$in)))]>;
+def ANDIo_1_GT_BIT : Pseudo<(outs crbitrc:$dst), (ins gprc:$in),
+ "#ANDIo_1_GT_BIT",
+ [(set i1:$dst, (trunc i32:$in))]>;
+
+def ANDIo_1_EQ_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_EQ_BIT8",
+ [(set i1:$dst, (trunc (not i64:$in)))]>;
+def ANDIo_1_GT_BIT8 : Pseudo<(outs crbitrc:$dst), (ins g8rc:$in),
+ "#ANDIo_1_GT_BIT8",
+ [(set i1:$dst, (trunc i64:$in))]>;
+}
+
+def : Pat<(i1 (not (trunc i32:$in))),
+ (ANDIo_1_EQ_BIT $in)>;
+def : Pat<(i1 (not (trunc i64:$in))),
+ (ANDIo_1_EQ_BIT8 $in)>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instructions used for assembler/disassembler only
+//
+
+def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
+ "isync", IIC_SprISYNC, []>;
+
+def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
+ "icbi $src", IIC_LdStICBI, []>;
+
+def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
+ "eieio", IIC_LdStLoad, []>;
+
+def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
+ "wait $L", IIC_LdStLoad, []>;
+
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsr $RS, $L", IIC_SprMTMSR>;
+
+def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
+ "mfmsr $RT", IIC_SprMFMSR, []>;
+
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsrd $RS, $L", IIC_SprMTMSRD>;
+
+def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
+ "slbie $RB", IIC_SprSLBIE, []>;
+
+def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
+ "slbmte $RS, $RB", IIC_SprSLBMTE, []>;
+
+def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
+ "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>;
+
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
+
+def TLBSYNC : XForm_0<31, 566, (outs), (ins),
+ "tlbsync", IIC_SprTLBSYNC, []>;
+
+def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
+ "tlbiel $RB", IIC_SprTLBIEL, []>;
+
+def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
+ "tlbie $RB,$RS", IIC_SprTLBIE, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Assembler Instruction Aliases
+//
+
+// Pseudo-instructions for alternate assembly syntax (never used by codegen).
+// These are aliases that require C++ handling to convert to the target
+// instruction, while InstAliases can be handled directly by tblgen.
+class PPCAsmPseudo<string asm, dag iops>
+ : Instruction {
+ let Namespace = "PPC";
+ bit PPC64 = 0; // Default value, override with isPPC64
+
+ let OutOperandList = (outs);
+ let InOperandList = iops;
+ let Pattern = [];
+ let AsmString = asm;
+ let isAsmParserOnly = 1;
+ let isPseudo = 1;
+}
+
+def : InstAlias<"sc", (SC 0)>;
+
+def : InstAlias<"sync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"msync", (SYNC 0)>, Requires<[IsNotBookE]>;
+def : InstAlias<"lwsync", (SYNC 1)>, Requires<[IsNotBookE]>;
+def : InstAlias<"ptesync", (SYNC 2)>, Requires<[IsNotBookE]>;
+
+def : InstAlias<"wait", (WAIT 0)>;
+def : InstAlias<"waitrsv", (WAIT 1)>;
+def : InstAlias<"waitimpl", (WAIT 2)>;
+
+def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
+def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
+def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+
+def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
+def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+
+def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+
+def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+
+def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
+
+def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
+def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+
+def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
+def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+
+def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
+def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+
+def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
+def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
+def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
+def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
+
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+
+def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+
+def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+
+def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
+def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
+
+def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+
+// These generic branch instruction forms are used for the assembler parser only.
+// Defs and Uses are conservative, since we don't know the BO value.
+let PPC970_Unit = 7 in {
+ let Defs = [CTR], Uses = [CTR, RM] in {
+ def gBC : BForm_3<16, 0, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
+ "bc $bo, $bi, $dst">;
+ def gBCA : BForm_3<16, 1, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
+ "bca $bo, $bi, $dst">;
+ }
+ let Defs = [LR, CTR], Uses = [CTR, RM] in {
+ def gBCL : BForm_3<16, 0, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
+ "bcl $bo, $bi, $dst">;
+ def gBCLA : BForm_3<16, 1, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
+ "bcla $bo, $bi, $dst">;
+ }
+ let Defs = [CTR], Uses = [CTR, LR, RM] in
+ def gBCLR : XLForm_2<19, 16, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bclr $bo, $bi, $bh", IIC_BrB, []>;
+ let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
+ def gBCLRL : XLForm_2<19, 16, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bclrl $bo, $bi, $bh", IIC_BrB, []>;
+ let Defs = [CTR], Uses = [CTR, LR, RM] in
+ def gBCCTR : XLForm_2<19, 528, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bcctr $bo, $bi, $bh", IIC_BrB, []>;
+ let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
+ def gBCCTRL : XLForm_2<19, 528, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bcctrl $bo, $bi, $bh", IIC_BrB, []>;
+}
+def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>;
+
+multiclass BranchSimpleMnemonic1<string name, string pm, int bo> {
+ def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>;
+ def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>;
+}
+multiclass BranchSimpleMnemonic2<string name, string pm, int bo>
+ : BranchSimpleMnemonic1<name, pm, bo> {
+ def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>;
+ def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>;
+}
+defm : BranchSimpleMnemonic2<"t", "", 12>;
+defm : BranchSimpleMnemonic2<"f", "", 4>;
+defm : BranchSimpleMnemonic2<"t", "-", 14>;
+defm : BranchSimpleMnemonic2<"f", "-", 6>;
+defm : BranchSimpleMnemonic2<"t", "+", 15>;
+defm : BranchSimpleMnemonic2<"f", "+", 7>;
+defm : BranchSimpleMnemonic1<"dnzt", "", 8>;
+defm : BranchSimpleMnemonic1<"dnzf", "", 0>;
+defm : BranchSimpleMnemonic1<"dzt", "", 10>;
+defm : BranchSimpleMnemonic1<"dzf", "", 2>;
+
+multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
+ def : InstAlias<"b"#name#pm#" $cc, $dst",
+ (BCC bibo, crrc:$cc, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#pm#" $dst",
+ (BCC bibo, CR0, condbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"a"#pm#" $cc, $dst",
+ (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"a"#pm#" $dst",
+ (BCCA bibo, CR0, abscondbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"lr"#pm#" $cc",
+ (BCCLR bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"lr"#pm,
+ (BCCLR bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"ctr"#pm#" $cc",
+ (BCCCTR bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"ctr"#pm,
+ (BCCCTR bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"l"#pm#" $cc, $dst",
+ (BCCL bibo, crrc:$cc, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"l"#pm#" $dst",
+ (BCCL bibo, CR0, condbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"la"#pm#" $cc, $dst",
+ (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"la"#pm#" $dst",
+ (BCCLA bibo, CR0, abscondbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"lrl"#pm#" $cc",
+ (BCCLRL bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"lrl"#pm,
+ (BCCLRL bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"ctrl"#pm#" $cc",
+ (BCCCTRL bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"ctrl"#pm,
+ (BCCCTRL bibo, CR0)>;
+}
+multiclass BranchExtendedMnemonic<string name, int bibo> {
+ defm : BranchExtendedMnemonicPM<name, "", bibo>;
+ defm : BranchExtendedMnemonicPM<name, "-", !add(bibo, 2)>;
+ defm : BranchExtendedMnemonicPM<name, "+", !add(bibo, 3)>;
+}
+defm : BranchExtendedMnemonic<"lt", 12>;
+defm : BranchExtendedMnemonic<"gt", 44>;
+defm : BranchExtendedMnemonic<"eq", 76>;
+defm : BranchExtendedMnemonic<"un", 108>;
+defm : BranchExtendedMnemonic<"so", 108>;
+defm : BranchExtendedMnemonic<"ge", 4>;
+defm : BranchExtendedMnemonic<"nl", 4>;
+defm : BranchExtendedMnemonic<"le", 36>;
+defm : BranchExtendedMnemonic<"ng", 36>;
+defm : BranchExtendedMnemonic<"ne", 68>;
+defm : BranchExtendedMnemonic<"nu", 100>;
+defm : BranchExtendedMnemonic<"ns", 100>;
+
+def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm64:$imm)>;
+def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm64:$imm)>;
+def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>;
+
+def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm64:$imm)>;
+def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm64:$imm)>;
+def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+
+multiclass TrapExtendedMnemonic<string name, int to> {
+ def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>;
+ def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>;
+ def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>;
+ def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>;
+}
+defm : TrapExtendedMnemonic<"lt", 16>;
+defm : TrapExtendedMnemonic<"le", 20>;
+defm : TrapExtendedMnemonic<"eq", 4>;
+defm : TrapExtendedMnemonic<"ge", 12>;
+defm : TrapExtendedMnemonic<"gt", 8>;
+defm : TrapExtendedMnemonic<"nl", 12>;
+defm : TrapExtendedMnemonic<"ne", 24>;
+defm : TrapExtendedMnemonic<"ng", 20>;
+defm : TrapExtendedMnemonic<"llt", 2>;
+defm : TrapExtendedMnemonic<"lle", 6>;
+defm : TrapExtendedMnemonic<"lge", 5>;
+defm : TrapExtendedMnemonic<"lgt", 1>;
+defm : TrapExtendedMnemonic<"lnl", 5>;
+defm : TrapExtendedMnemonic<"lng", 6>;
+defm : TrapExtendedMnemonic<"u", 31>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
new file mode 100644
index 000000000000..49bcc4876d33
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -0,0 +1,816 @@
+//===- PPCInstrVSX.td - The PowerPC VSX Extension --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the VSX extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def PPCRegVSRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsrc : RegisterOperand<VSRC> {
+ let ParserMatchClass = PPCRegVSRCAsmOperand;
+}
+
+def PPCRegVSFRCAsmOperand : AsmOperandClass {
+ let Name = "RegVSFRC"; let PredicateMethod = "isVSRegNumber";
+}
+def vsfrc : RegisterOperand<VSFRC> {
+ let ParserMatchClass = PPCRegVSFRCAsmOperand;
+}
+
+multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>;
+ let Defs = [CR6] in
+ def o : XX3Form_Rc<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT;
+ }
+}
+
+def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
+let Predicates = [HasVSX] in {
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
+let Uses = [RM] in {
+
+ // Load indexed instructions
+ let mayLoad = 1, canFoldAsLoad = 1 in {
+ def LXSDX : XForm_1<31, 588,
+ (outs vsfrc:$XT), (ins memrr:$src),
+ "lxsdx $XT, $src", IIC_LdStLFD,
+ [(set f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVD2X : XForm_1<31, 844,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvd2x $XT, $src", IIC_LdStLFD,
+ [(set v2f64:$XT, (load xoaddr:$src))]>;
+
+ def LXVDSX : XForm_1<31, 332,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+
+ def LXVW4X : XForm_1<31, 780,
+ (outs vsrc:$XT), (ins memrr:$src),
+ "lxvw4x $XT, $src", IIC_LdStLFD, []>;
+ }
+
+ // Store indexed instructions
+ let mayStore = 1 in {
+ def STXSDX : XX1Form<31, 716,
+ (outs), (ins vsfrc:$XT, memrr:$dst),
+ "stxsdx $XT, $dst", IIC_LdStSTFD,
+ [(store f64:$XT, xoaddr:$dst)]>;
+
+ def STXVD2X : XX1Form<31, 972,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ [(store v2f64:$XT, xoaddr:$dst)]>;
+
+ def STXVW4X : XX1Form<31, 908,
+ (outs), (ins vsrc:$XT, memrr:$dst),
+ "stxvw4x $XT, $dst", IIC_LdStSTFD, []>;
+ }
+
+ // Add/Mul Instructions
+ let isCommutable = 1 in {
+ def XSADDDP : XX3Form<60, 32,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fadd f64:$XA, f64:$XB))]>;
+ def XSMULDP : XX3Form<60, 48,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fmul f64:$XA, f64:$XB))]>;
+
+ def XVADDDP : XX3Form<60, 96,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvadddp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fadd v2f64:$XA, v2f64:$XB))]>;
+
+ def XVADDSP : XX3Form<60, 64,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvaddsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fadd v4f32:$XA, v4f32:$XB))]>;
+
+ def XVMULDP : XX3Form<60, 112,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmuldp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fmul v2f64:$XA, v2f64:$XB))]>;
+
+ def XVMULSP : XX3Form<60, 80,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmulsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fmul v4f32:$XA, v4f32:$XB))]>;
+ }
+
+ // Subtract Instructions
+ def XSSUBDP : XX3Form<60, 40,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xssubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fsub f64:$XA, f64:$XB))]>;
+
+ def XVSUBDP : XX3Form<60, 104,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubdp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fsub v2f64:$XA, v2f64:$XB))]>;
+ def XVSUBSP : XX3Form<60, 72,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvsubsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fsub v4f32:$XA, v4f32:$XB))]>;
+
+ // FMA Instructions
+ let BaseName = "XSMADDADP" in {
+ let isCommutable = 1 in
+ def XSMADDADP : XX3Form<60, 33,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMADDMDP : XX3Form<60, 41,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSMSUBADP" in {
+ let isCommutable = 1 in
+ def XSMSUBADP : XX3Form<60, 49,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fma f64:$XA, f64:$XB, (fneg f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSMSUBMDP : XX3Form<60, 57,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMADDADP" in {
+ let isCommutable = 1 in
+ def XSNMADDADP : XX3Form<60, 161,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMADDMDP : XX3Form<60, 169,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XSNMSUBADP" in {
+ let isCommutable = 1 in
+ def XSNMSUBADP : XX3Form<60, 177,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fma f64:$XA, f64:$XB, (fneg f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XSNMSUBMDP : XX3Form<60, 185,
+ (outs vsfrc:$XT), (ins vsfrc:$XTi, vsfrc:$XA, vsfrc:$XB),
+ "xsnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDADP" in {
+ let isCommutable = 1 in
+ def XVMADDADP : XX3Form<60, 97,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMDP : XX3Form<60, 105,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMADDASP" in {
+ let isCommutable = 1 in
+ def XVMADDASP : XX3Form<60, 65,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMADDMSP : XX3Form<60, 73,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBADP" in {
+ let isCommutable = 1 in
+ def XVMSUBADP : XX3Form<60, 113,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMDP : XX3Form<60, 121,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVMSUBASP" in {
+ let isCommutable = 1 in
+ def XVMSUBASP : XX3Form<60, 81,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVMSUBMSP : XX3Form<60, 89,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDADP" in {
+ let isCommutable = 1 in
+ def XVNMADDADP : XX3Form<60, 225,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, v2f64:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMDP : XX3Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMADDASP" in {
+ let isCommutable = 1 in
+ def XVNMADDASP : XX3Form<60, 193,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, v4f32:$XTi)))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMADDMSP : XX3Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmaddmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBADP" in {
+ let isCommutable = 1 in
+ def XVNMSUBADP : XX3Form<60, 241,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubadp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fma v2f64:$XA, v2f64:$XB, (fneg v2f64:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMDP : XX3Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmdp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ let BaseName = "XVNMSUBASP" in {
+ let isCommutable = 1 in
+ def XVNMSUBASP : XX3Form<60, 209,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubasp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fma v4f32:$XA, v4f32:$XB, (fneg v4f32:$XTi))))]>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ let IsVSXFMAAlt = 1 in
+ def XVNMSUBMSP : XX3Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XTi, vsrc:$XA, vsrc:$XB),
+ "xvnmsubmsp $XT, $XA, $XB", IIC_VecFP, []>,
+ RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
+ AltVSXFMARel;
+ }
+
+ // Division Instructions
+ def XSDIVDP : XX3Form<60, 56,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set f64:$XT, (fdiv f64:$XA, f64:$XB))]>;
+ def XSSQRTDP : XX2Form<60, 75,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xssqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set f64:$XT, (fsqrt f64:$XB))]>;
+
+ def XSREDP : XX2Form<60, 90,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsredp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfre f64:$XB))]>;
+ def XSRSQRTEDP : XX2Form<60, 74,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
+
+ def XSTDIVDP : XX3Form_1<60, 61,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSTSQRTDP : XX2Form_1<60, 106,
+ (outs crrc:$crD), (ins vsfrc:$XB),
+ "xstsqrtdp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVDIVDP : XX3Form<60, 120,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivdp $XT, $XA, $XB", IIC_FPDivD,
+ [(set v2f64:$XT, (fdiv v2f64:$XA, v2f64:$XB))]>;
+ def XVDIVSP : XX3Form<60, 88,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvdivsp $XT, $XA, $XB", IIC_FPDivS,
+ [(set v4f32:$XT, (fdiv v4f32:$XA, v4f32:$XB))]>;
+
+ def XVSQRTDP : XX2Form<60, 203,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtdp $XT, $XB", IIC_FPSqrtD,
+ [(set v2f64:$XT, (fsqrt v2f64:$XB))]>;
+ def XVSQRTSP : XX2Form<60, 139,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvsqrtsp $XT, $XB", IIC_FPSqrtS,
+ [(set v4f32:$XT, (fsqrt v4f32:$XB))]>;
+
+ def XVTDIVDP : XX3Form_1<60, 125,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XVTDIVSP : XX3Form_1<60, 93,
+ (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ def XVTSQRTDP : XX2Form_1<60, 234,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtdp $crD, $XB", IIC_FPCompare, []>;
+ def XVTSQRTSP : XX2Form_1<60, 170,
+ (outs crrc:$crD), (ins vsrc:$XB),
+ "xvtsqrtsp $crD, $XB", IIC_FPCompare, []>;
+
+ def XVREDP : XX2Form<60, 218,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvredp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfre v2f64:$XB))]>;
+ def XVRESP : XX2Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvresp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfre v4f32:$XB))]>;
+
+ def XVRSQRTEDP : XX2Form<60, 202,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtedp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (PPCfrsqrte v2f64:$XB))]>;
+ def XVRSQRTESP : XX2Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrsqrtesp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (PPCfrsqrte v4f32:$XB))]>;
+
+ // Compare Instructions
+ def XSCMPODP : XX3Form_1<60, 43,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>;
+ def XSCMPUDP : XX3Form_1<60, 35,
+ (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+
+ defm XVCMPEQDP : XX3Form_Rcr<60, 99,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPEQSP : XX3Form_Rcr<60, 67,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpeqsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGEDP : XX3Form_Rcr<60, 115,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgedp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGESP : XX3Form_Rcr<60, 83,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgesp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTDP : XX3Form_Rcr<60, 107,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtdp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+ defm XVCMPGTSP : XX3Form_Rcr<60, 75,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcmpgtsp", "$XT, $XA, $XB", IIC_VecFPCompare, []>;
+
+ // Move Instructions
+ def XSABSDP : XX2Form<60, 345,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fabs f64:$XB))]>;
+ def XSNABSDP : XX2Form<60, 361,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnabsdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg (fabs f64:$XB)))]>;
+ def XSNEGDP : XX2Form<60, 377,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsnegdp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fneg f64:$XB))]>;
+ def XSCPSGNDP : XX3Form<60, 176,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set f64:$XT, (fcopysign f64:$XB, f64:$XA))]>;
+
+ def XVABSDP : XX2Form<60, 473,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fabs v2f64:$XB))]>;
+
+ def XVABSSP : XX2Form<60, 409,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fabs v4f32:$XB))]>;
+
+ def XVCPSGNDP : XX3Form<60, 240,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgndp $XT, $XA, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fcopysign v2f64:$XB, v2f64:$XA))]>;
+ def XVCPSGNSP : XX3Form<60, 208,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvcpsgnsp $XT, $XA, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fcopysign v4f32:$XB, v4f32:$XA))]>;
+
+ def XVNABSDP : XX2Form<60, 489,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabsdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg (fabs v2f64:$XB)))]>;
+ def XVNABSSP : XX2Form<60, 425,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnabssp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg (fabs v4f32:$XB)))]>;
+
+ def XVNEGDP : XX2Form<60, 505,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegdp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fneg v2f64:$XB))]>;
+ def XVNEGSP : XX2Form<60, 441,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvnegsp $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fneg v4f32:$XB))]>;
+
+ // Conversion Instructions
+ def XSCVDPSP : XX2Form<60, 265,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XSCVDPSXDS : XX2Form<60, 344,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+ def XSCVDPSXWS : XX2Form<60, 88,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpsxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+ def XSCVDPUXDS : XX2Form<60, 328,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxds $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+ def XSCVDPUXWS : XX2Form<60, 72,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvdpuxws $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+ def XSCVSPDP : XX2Form<60, 329,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvspdp $XT, $XB", IIC_VecFP, []>;
+ def XSCVSXDDP : XX2Form<60, 376,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvsxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfid f64:$XB))]>;
+ def XSCVUXDDP : XX2Form<60, 360,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xscvuxddp $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (PPCfcfidu f64:$XB))]>;
+
+ def XVCVDPSP : XX2Form<60, 393,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPSXDS : XX2Form<60, 472,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
+ def XVCVDPSXWS : XX2Form<60, 216,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVDPUXDS : XX2Form<60, 456,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxds $XT, $XB", IIC_VecFP,
+ [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
+ def XVCVDPUXWS : XX2Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+
+ def XVCVSPDP : XX2Form<60, 457,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXDS : XX2Form<60, 408,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPSXWS : XX2Form<60, 152,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXDS : XX2Form<60, 392,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
+ def XVCVSPUXWS : XX2Form<60, 136,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXDDP : XX2Form<60, 504,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
+ def XVCVSXDSP : XX2Form<60, 440,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWDP : XX2Form<60, 248,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVSXWSP : XX2Form<60, 184,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvsxwsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXDDP : XX2Form<60, 488,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxddp $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
+ def XVCVUXDSP : XX2Form<60, 424,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWDP : XX2Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+ def XVCVUXWSP : XX2Form<60, 168,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+
+ // Rounding Instructions
+ def XSRDPI : XX2Form<60, 73,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpi $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (frnd f64:$XB))]>;
+ def XSRDPIC : XX2Form<60, 107,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpic $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fnearbyint f64:$XB))]>;
+ def XSRDPIM : XX2Form<60, 121,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpim $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ffloor f64:$XB))]>;
+ def XSRDPIP : XX2Form<60, 105,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpip $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (fceil f64:$XB))]>;
+ def XSRDPIZ : XX2Form<60, 89,
+ (outs vsfrc:$XT), (ins vsfrc:$XB),
+ "xsrdpiz $XT, $XB", IIC_VecFP,
+ [(set f64:$XT, (ftrunc f64:$XB))]>;
+
+ def XVRDPI : XX2Form<60, 201,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpi $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+ def XVRDPIC : XX2Form<60, 235,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpic $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fnearbyint v2f64:$XB))]>;
+ def XVRDPIM : XX2Form<60, 249,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpim $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ffloor v2f64:$XB))]>;
+ def XVRDPIP : XX2Form<60, 233,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpip $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (fceil v2f64:$XB))]>;
+ def XVRDPIZ : XX2Form<60, 217,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrdpiz $XT, $XB", IIC_VecFP,
+ [(set v2f64:$XT, (ftrunc v2f64:$XB))]>;
+
+ def XVRSPI : XX2Form<60, 137,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspi $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+ def XVRSPIC : XX2Form<60, 171,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspic $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fnearbyint v4f32:$XB))]>;
+ def XVRSPIM : XX2Form<60, 185,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspim $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ffloor v4f32:$XB))]>;
+ def XVRSPIP : XX2Form<60, 169,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspip $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (fceil v4f32:$XB))]>;
+ def XVRSPIZ : XX2Form<60, 153,
+ (outs vsrc:$XT), (ins vsrc:$XB),
+ "xvrspiz $XT, $XB", IIC_VecFP,
+ [(set v4f32:$XT, (ftrunc v4f32:$XB))]>;
+
+ // Max/Min Instructions
+ let isCommutable = 1 in {
+ def XSMAXDP : XX3Form<60, 160,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XSMINDP : XX3Form<60, 168,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xsmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXDP : XX3Form<60, 224,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxdp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINDP : XX3Form<60, 232,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmindp $XT, $XA, $XB", IIC_VecFP, []>;
+
+ def XVMAXSP : XX3Form<60, 192,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvmaxsp $XT, $XA, $XB", IIC_VecFP, []>;
+ def XVMINSP : XX3Form<60, 200,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xvminsp $XT, $XA, $XB", IIC_VecFP, []>;
+ } // isCommutable
+} // Uses = [RM]
+
+ // Logical Instructions
+ let isCommutable = 1 in
+ def XXLAND : XX3Form<60, 130,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxland $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA, v4i32:$XB))]>;
+ def XXLANDC : XX3Form<60, 138,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlandc $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (and v4i32:$XA,
+ (vnot_ppc v4i32:$XB)))]>;
+ let isCommutable = 1 in {
+ def XXLNOR : XX3Form<60, 162,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlnor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (vnot_ppc (or v4i32:$XA,
+ v4i32:$XB)))]>;
+ def XXLOR : XX3Form<60, 146,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (or v4i32:$XA, v4i32:$XB))]>;
+ let isCodeGenOnly = 1 in
+ def XXLORf: XX3Form<60, 146,
+ (outs vsfrc:$XT), (ins vsfrc:$XA, vsfrc:$XB),
+ "xxlor $XT, $XA, $XB", IIC_VecGeneral, []>;
+ def XXLXOR : XX3Form<60, 154,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
+ [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
+ } // isCommutable
+
+ // Permutation Instructions
+ def XXMRGHW : XX3Form<60, 18,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrghw $XT, $XA, $XB", IIC_VecPerm, []>;
+ def XXMRGLW : XX3Form<60, 50,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
+ "xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
+
+ def XXPERMDI : XX3Form_2<60, 10,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
+ "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+ def XXSEL : XX4Form<60, 3,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
+ "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
+
+ def XXSLDWI : XX3Form_2<60, 2,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
+ "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm, []>;
+ def XXSPLTW : XX2Form_2<60, 164,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
+ "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+} // neverHasSideEffects
+} // AddedComplexity
+
+def : InstAlias<"xvmovdp $XT, $XB",
+ (XVCPSGNDP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+def : InstAlias<"xvmovsp $XT, $XB",
+ (XVCPSGNSP vsrc:$XT, vsrc:$XB, vsrc:$XB)>;
+
+def : InstAlias<"xxspltd $XT, $XB, 0",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 3)>;
+def : InstAlias<"xxmrghd $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 0)>;
+def : InstAlias<"xxmrgld $XT, $XA, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+ (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+
+let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+def : Pat<(v2f64 (scalar_to_vector f64:$A)),
+ (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
+
+def : Pat<(f64 (vector_extract v2f64:$S, 0)),
+ (f64 (EXTRACT_SUBREG $S, sub_64))>;
+def : Pat<(f64 (vector_extract v2f64:$S, 1)),
+ (f64 (EXTRACT_SUBREG (XXPERMDI $S, $S, 2), sub_64))>;
+
+// Additional fnmsub patterns: -a*c + b == -(a*c - b)
+def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+def : Pat<(fma f64:$A, (fneg f64:$C), f64:$B),
+ (XSNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v2f64:$A), v2f64:$C, v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+def : Pat<(fma v2f64:$A, (fneg v2f64:$C), v2f64:$B),
+ (XVNMSUBADP $B, $C, $A)>;
+
+def : Pat<(fma (fneg v4f32:$A), v4f32:$C, v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+def : Pat<(fma v4f32:$A, (fneg v4f32:$C), v4f32:$B),
+ (XVNMSUBASP $B, $C, $A)>;
+
+def : Pat<(v2f64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2f64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2i64 (bitconvert v4f32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v4i32:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v8i16:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+def : Pat<(v2i64 (bitconvert v16i8:$A)),
+ (COPY_TO_REGCLASS $A, VSRC)>;
+
+def : Pat<(v4f32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v4i32 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v8i16 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v16i8 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+def : Pat<(v2f64 (bitconvert v2i64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+def : Pat<(v2i64 (bitconvert v2f64:$A)),
+ (COPY_TO_REGCLASS $A, VRRC)>;
+
+// sign extension patterns
+// To extend "in place" from v2i32 to v2i64, we have input data like:
+// | undef | i32 | undef | i32 |
+// but xvcvsxwdp expects the input in big-Endian format:
+// | i32 | undef | i32 | undef |
+// so we need to shift everything to the left by one i32 (word) before
+// the conversion.
+def : Pat<(sext_inreg v2i64:$C, v2i32),
+ (XVCVDPSXDS (XVCVSXWDP (XXSLDWI $C, $C, 1)))>;
+def : Pat<(v2f64 (sint_to_fp (sext_inreg v2i64:$C, v2i32))),
+ (XVCVSXWDP (XXSLDWI $C, $C, 1))>;
+
+} // AddedComplexity
+} // HasVSX
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 000000000000..e5f113a0c030
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,482 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "jit"
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+PPCJITInfo::PPCJITInfo(PPCSubtarget &STI)
+ : Subtarget(STI), is64Bit(STI.isPPC64()) {
+ useGOT = 0;
+}
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+ ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+ ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+ ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+ (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR) \
+ ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+ ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+ ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16) BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS) BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK) BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+ intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+ unsigned *AtI = (unsigned*)(intptr_t)At;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ AtI[0] = BUILD_B(Offset, isCall); // b/bl target
+ } else if (!is64Bit) {
+ AtI[0] = BUILD_LIS(12, To >> 16); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_MTCTR(12); // mtctr r12
+ AtI[3] = BUILD_BCTR(isCall); // bctr/bctrl
+ } else {
+ AtI[0] = BUILD_LIS(12, To >> 48); // lis r12, hi16(address)
+ AtI[1] = BUILD_ORI(12, 12, To >> 32); // ori r12, r12, lo16(address)
+ AtI[2] = BUILD_SLDI(12, 12, 32); // sldi r12, r12, 32
+ AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+ AtI[4] = BUILD_ORI(12, 12, To); // ori r12, r12, lo16(address)
+ AtI[5] = BUILD_MTCTR(12); // mtctr r12
+ AtI[6] = BUILD_BCTR(isCall); // bctr/bctrl
+ }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+// The first clause of the preprocessor directive looks wrong, but it is
+// necessary when compiling this code on non-PowerPC hosts.
+#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
+void PPC32CompilationCallback() {
+ llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
+}
+#elif !defined(__ELF__)
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // PowerPC32 ABI linkage - 24 bytes
+ // parameters - 32 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 32 bytes
+ "mflr r0\n"
+ "stw r0, 8(r1)\n"
+ "stwu r1, -208(r1)\n"
+ // Save all int arg registers
+ "stw r10, 204(r1)\n" "stw r9, 200(r1)\n"
+ "stw r8, 196(r1)\n" "stw r7, 192(r1)\n"
+ "stw r6, 188(r1)\n" "stw r5, 184(r1)\n"
+ "stw r4, 180(r1)\n" "stw r3, 176(r1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd f13, 168(r1)\n" "stfd f12, 160(r1)\n"
+ "stfd f11, 152(r1)\n" "stfd f10, 144(r1)\n"
+ "stfd f9, 136(r1)\n" "stfd f8, 128(r1)\n"
+ "stfd f7, 120(r1)\n" "stfd f6, 112(r1)\n"
+ "stfd f5, 104(r1)\n" "stfd f4, 96(r1)\n"
+ "stfd f3, 88(r1)\n" "stfd f2, 80(r1)\n"
+ "stfd f1, 72(r1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr r3, r0\n"
+ "lwz r2, 208(r1)\n" // stub's frame
+ "lwz r4, 8(r2)\n" // stub's lr
+ "li r5, 0\n" // 0 == 32 bit
+ "bl _LLVMPPCCompilationCallback\n"
+ "mtctr r3\n"
+ // Restore all int arg registers
+ "lwz r10, 204(r1)\n" "lwz r9, 200(r1)\n"
+ "lwz r8, 196(r1)\n" "lwz r7, 192(r1)\n"
+ "lwz r6, 188(r1)\n" "lwz r5, 184(r1)\n"
+ "lwz r4, 180(r1)\n" "lwz r3, 176(r1)\n"
+ // Restore all FP arg registers
+ "lfd f13, 168(r1)\n" "lfd f12, 160(r1)\n"
+ "lfd f11, 152(r1)\n" "lfd f10, 144(r1)\n"
+ "lfd f9, 136(r1)\n" "lfd f8, 128(r1)\n"
+ "lfd f7, 120(r1)\n" "lfd f6, 112(r1)\n"
+ "lfd f5, 104(r1)\n" "lfd f4, 96(r1)\n"
+ "lfd f3, 88(r1)\n" "lfd f2, 80(r1)\n"
+ "lfd f1, 72(r1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz r1, 208(r1)\n"
+ "lwz r2, 8(r1)\n"
+ "mtlr r2\n"
+ "bctr\n"
+ );
+
+#else
+// ELF PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+ // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // FIXME: could shrink frame
+ // Set up a proper stack frame
+ // FIXME Layout
+ // 8 double registers - 64 bytes
+ // 8 int registers - 32 bytes
+ "mflr 0\n"
+ "stw 0, 4(1)\n"
+ "stwu 1, -104(1)\n"
+ // Save all int arg registers
+ "stw 10, 100(1)\n" "stw 9, 96(1)\n"
+ "stw 8, 92(1)\n" "stw 7, 88(1)\n"
+ "stw 6, 84(1)\n" "stw 5, 80(1)\n"
+ "stw 4, 76(1)\n" "stw 3, 72(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 8, 64(1)\n"
+ "stfd 7, 56(1)\n" "stfd 6, 48(1)\n"
+ "stfd 5, 40(1)\n" "stfd 4, 32(1)\n"
+ "stfd 3, 24(1)\n" "stfd 2, 16(1)\n"
+ "stfd 1, 8(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 0.
+ "mr 3, 0\n"
+ "lwz 5, 104(1)\n" // stub's frame
+ "lwz 4, 4(5)\n" // stub's lr
+ "li 5, 0\n" // 0 == 32 bit
+ "bl LLVMPPCCompilationCallback\n"
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "lwz 10, 100(1)\n" "lwz 9, 96(1)\n"
+ "lwz 8, 92(1)\n" "lwz 7, 88(1)\n"
+ "lwz 6, 84(1)\n" "lwz 5, 80(1)\n"
+ "lwz 4, 76(1)\n" "lwz 3, 72(1)\n"
+ // Restore all FP arg registers
+ "lfd 8, 64(1)\n"
+ "lfd 7, 56(1)\n" "lfd 6, 48(1)\n"
+ "lfd 5, 40(1)\n" "lfd 4, 32(1)\n"
+ "lfd 3, 24(1)\n" "lfd 2, 16(1)\n"
+ "lfd 1, 8(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "lwz 1, 104(1)\n"
+ "lwz 0, 4(1)\n"
+ "mtlr 0\n"
+ "bctr\n"
+ );
+#endif
+
+#if !defined(__powerpc64__) && !defined(__ppc64__)
+void PPC64CompilationCallback() {
+ llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
+}
+#else
+# ifdef __ELF__
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl PPC64CompilationCallback\n"
+#if _CALL_ELF == 2
+ ".type PPC64CompilationCallback,@function\n"
+"PPC64CompilationCallback:\n"
+#else
+ ".section \".opd\",\"aw\",@progbits\n"
+ ".align 3\n"
+"PPC64CompilationCallback:\n"
+ ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+ ".size PPC64CompilationCallback,24\n"
+ ".previous\n"
+ ".align 4\n"
+ ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#endif
+# else
+asm(
+ ".text\n"
+ ".align 2\n"
+ ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+# endif
+ // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
+ // FIXME: need to save v[0-19] for altivec?
+ // Set up a proper stack frame
+ // Layout
+ // PowerPC64 ABI linkage - 48 bytes
+ // parameters - 64 bytes
+ // 13 double registers - 104 bytes
+ // 8 int registers - 64 bytes
+ "mflr 0\n"
+ "std 0, 16(1)\n"
+ "stdu 1, -280(1)\n"
+ // Save all int arg registers
+ "std 10, 272(1)\n" "std 9, 264(1)\n"
+ "std 8, 256(1)\n" "std 7, 248(1)\n"
+ "std 6, 240(1)\n" "std 5, 232(1)\n"
+ "std 4, 224(1)\n" "std 3, 216(1)\n"
+ // Save all call-clobbered FP regs.
+ "stfd 13, 208(1)\n" "stfd 12, 200(1)\n"
+ "stfd 11, 192(1)\n" "stfd 10, 184(1)\n"
+ "stfd 9, 176(1)\n" "stfd 8, 168(1)\n"
+ "stfd 7, 160(1)\n" "stfd 6, 152(1)\n"
+ "stfd 5, 144(1)\n" "stfd 4, 136(1)\n"
+ "stfd 3, 128(1)\n" "stfd 2, 120(1)\n"
+ "stfd 1, 112(1)\n"
+ // Arguments to Compilation Callback:
+ // r3 - our lr (address of the call instruction in stub plus 4)
+ // r4 - stub's lr (address of instruction that called the stub plus 4)
+ // r5 - is64Bit - always 1.
+ "mr 3, 0\n" // return address (still in r0)
+ "ld 5, 280(1)\n" // stub's frame
+ "ld 4, 16(5)\n" // stub's lr
+ "li 5, 1\n" // 1 == 64 bit
+# ifdef __ELF__
+ "bl LLVMPPCCompilationCallback\n"
+ "nop\n"
+# else
+ "bl _LLVMPPCCompilationCallback\n"
+# endif
+ "mtctr 3\n"
+ // Restore all int arg registers
+ "ld 10, 272(1)\n" "ld 9, 264(1)\n"
+ "ld 8, 256(1)\n" "ld 7, 248(1)\n"
+ "ld 6, 240(1)\n" "ld 5, 232(1)\n"
+ "ld 4, 224(1)\n" "ld 3, 216(1)\n"
+ // Restore all FP arg registers
+ "lfd 13, 208(1)\n" "lfd 12, 200(1)\n"
+ "lfd 11, 192(1)\n" "lfd 10, 184(1)\n"
+ "lfd 9, 176(1)\n" "lfd 8, 168(1)\n"
+ "lfd 7, 160(1)\n" "lfd 6, 152(1)\n"
+ "lfd 5, 144(1)\n" "lfd 4, 136(1)\n"
+ "lfd 3, 128(1)\n" "lfd 2, 120(1)\n"
+ "lfd 1, 112(1)\n"
+ // Pop 3 frames off the stack and branch to target
+ "ld 1, 280(1)\n"
+ "ld 0, 16(1)\n"
+ "mtlr 0\n"
+ // XXX: any special TOC handling in the ELF case for JIT?
+ "bctr\n"
+ );
+#endif
+
+extern "C" {
+LLVM_LIBRARY_VISIBILITY void *
+LLVMPPCCompilationCallback(unsigned *StubCallAddrPlus4,
+ unsigned *OrigCallAddrPlus4,
+ bool is64Bit) {
+ // Adjust the pointer to the address of the call instruction in the stub
+ // emitted by emitFunctionStub, rather than the instruction after it.
+ unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+ unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+ void *Target = JITCompilerFunction(StubCallAddr);
+
+ // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+ // it to branch directly to the destination. If so, rewrite it so it does not
+ // need to go through the stub anymore.
+ unsigned OrigCallInst = *OrigCallAddr;
+ if ((OrigCallInst >> 26) == 18) { // Direct call.
+ intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+
+ if (Offset >= -(1 << 23) && Offset < (1 << 23)) { // In range?
+ // Clear the original target out.
+ OrigCallInst &= (63 << 26) | 3;
+ // Fill in the new target.
+ OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+ // Replace the call.
+ *OrigCallAddr = OrigCallInst;
+ }
+ }
+
+ // Assert that we are coming from a stub that was created with our
+ // emitFunctionStub.
+ if ((*StubCallAddr >> 26) == 18)
+ StubCallAddr -= 3;
+ else {
+ assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+ StubCallAddr -= is64Bit ? 9 : 6;
+ }
+
+ // Rewrite the stub with an unconditional branch to the target, for any users
+ // who took the address of the stub.
+ EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
+
+ // Put the address of the target function to call and the address to return to
+ // after calling the target function in a place that is easy to get on the
+ // stack after we restore all regs.
+ return Target;
+}
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+ JITCompilerFunction = Fn;
+ return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+ // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+ // instructions to save the caller's address if this is a lazy-compilation
+ // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+ // into a register and jump through it.
+ StubLayout Result = {10*4, 4};
+ return Result;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) {
+ // If this is just a call to an external function, emit a branch instead of a
+ // call. The code is the same except for one bit of the last instruction.
+ if (Fn != (void*)(intptr_t)PPC32CompilationCallback &&
+ Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+ return Addr;
+ }
+
+ void *Addr = (void*)JCE.getCurrentPCValue();
+ if (is64Bit) {
+ JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0xf9610060); // std r11, 96(r1)
+ } else if (Subtarget.isDarwinABI()){
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610028); // stw r11, 40(r1)
+ } else {
+ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1)
+ JCE.emitWordBE(0x7d6802a6); // mflr r11
+ JCE.emitWordBE(0x91610024); // stw r11, 36(r1)
+ }
+ intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ JCE.emitWordBE(0);
+ EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+ return Addr;
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) {
+ for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+ unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+ intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+ switch ((PPC::RelocationType)MR->getRelocationType()) {
+ default: llvm_unreachable("Unknown relocation type!");
+ case PPC::reloc_pcrel_bx:
+ // PC-relative relocation for b and bl instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 24)-1)) << 2;
+ break;
+ case PPC::reloc_pcrel_bcx:
+ // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+ // bcx instructions.
+ ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+ assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+ "Relocation out of range!");
+ *RelocPos |= (ResultPtr & ((1 << 14)-1)) << 2;
+ break;
+ case PPC::reloc_absolute_high: // high bits of ref -> low 16 of instr
+ case PPC::reloc_absolute_low: { // low bits of ref -> low 16 of instr
+ ResultPtr += MR->getConstantVal();
+
+ // If this is a high-part access, get the high-part.
+ if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+ // If the low part will have a carry (really a borrow) from the low
+ // 16-bits into the high 16, add a bit to borrow from.
+ if (((int)ResultPtr << 16) < 0)
+ ResultPtr += 1 << 16;
+ ResultPtr >>= 16;
+ }
+
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 65535;
+ unsigned HighBits = *RelocPos & ~65535;
+ *RelocPos = LowBits | HighBits; // Slam into low 16-bits
+ break;
+ }
+ case PPC::reloc_absolute_low_ix: { // low bits of ref -> low 14 of instr
+ ResultPtr += MR->getConstantVal();
+ // Do the addition then mask, so the addition does not overflow the 16-bit
+ // immediate section of the instruction.
+ unsigned LowBits = (*RelocPos + ResultPtr) & 0xFFFC;
+ unsigned HighBits = *RelocPos & 0xFFFF0003;
+ *RelocPos = LowBits | HighBits; // Slam into low 14-bits.
+ break;
+ }
+ }
+ }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+ EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+ sys::Memory::InvalidateInstructionCache(Old, 7*4);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 000000000000..b6b37ffb852b
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,46 @@
+//===-- PPCJITInfo.h - PowerPC impl. of the JIT interface -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+class PPCSubtarget;
+class PPCJITInfo : public TargetJITInfo {
+protected:
+ PPCSubtarget &Subtarget;
+ bool is64Bit;
+
+public:
+ PPCJITInfo(PPCSubtarget &STI);
+
+ StubLayout getStubLayout() override;
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
+ void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs,
+ unsigned char *GOTBase) override;
+
+ /// replaceMachineCodeForFunction - Make it so that calling the function
+ /// whose machine code is at OLD turns into a call to NEW, perhaps by
+ /// overwriting OLD with a branch to NEW. This is used for self-modifying
+ /// code.
+ ///
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 000000000000..668041371780
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,216 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "MCTargetDesc/PPCMCExpr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+ return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+ const TargetMachine &TM = AP.TM;
+ Mangler *Mang = AP.Mang;
+ const DataLayout *DL = TM.getDataLayout();
+ MCContext &Ctx = AP.OutContext;
+ bool isDarwin = TM.getSubtarget<PPCSubtarget>().isDarwin();
+
+ SmallString<128> Name;
+ StringRef Suffix;
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB) {
+ if (isDarwin)
+ Suffix = "$stub";
+ } else if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG)
+ Suffix = "$non_lazy_ptr";
+
+ if (!Suffix.empty())
+ Name += DL->getPrivateGlobalPrefix();
+
+ unsigned PrefixLen = Name.size();
+
+ if (!MO.isGlobal()) {
+ assert(MO.isSymbol() && "Isn't a symbol reference");
+ Mang->getNameWithPrefix(Name, MO.getSymbolName());
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ TM.getNameWithPrefix(Name, GV, *Mang);
+ }
+
+ unsigned OrigLen = Name.size() - PrefixLen;
+
+ Name += Suffix;
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+ StringRef OrigName = StringRef(Name).substr(PrefixLen, OrigLen);
+
+ // If the target flags on the operand changes the name of the symbol, do that
+ // before we return the symbol.
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && isDarwin) {
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ getMachOMMI(AP).getFnStubEntry(Sym);
+ if (StubSym.getPointer())
+ return Sym;
+
+ if (MO.isGlobal()) {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ } else {
+ StubSym =
+ MachineModuleInfoImpl::
+ StubValueTy(Ctx.GetOrCreateSymbol(OrigName), false);
+ }
+ return Sym;
+ }
+
+ // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+ // then add the suffix.
+ if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+ MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+
+ MachineModuleInfoImpl::StubValueTy &StubSym =
+ (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
+ MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+
+ if (!StubSym.getPointer()) {
+ assert(MO.isGlobal() && "Extern symbol not handled yet");
+ StubSym = MachineModuleInfoImpl::
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
+ !MO.getGlobal()->hasInternalLinkage());
+ }
+ return Sym;
+ }
+
+ return Sym;
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+ AsmPrinter &Printer, bool isDarwin) {
+ MCContext &Ctx = Printer.OutContext;
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+ unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
+
+ switch (access) {
+ case PPCII::MO_TPREL_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_TPREL_LO;
+ break;
+ case PPCII::MO_TPREL_HA:
+ RefKind = MCSymbolRefExpr::VK_PPC_TPREL_HA;
+ break;
+ case PPCII::MO_DTPREL_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_DTPREL_LO;
+ break;
+ case PPCII::MO_TLSLD_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO;
+ break;
+ case PPCII::MO_TOC_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO;
+ break;
+ case PPCII::MO_TLS:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ break;
+ }
+
+ if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)
+ RefKind = MCSymbolRefExpr::VK_PLT;
+
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(MO.getOffset(), Ctx),
+ Ctx);
+
+ // Subtract off the PIC base if required.
+ if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+
+ const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+ Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+ }
+
+ // Add ha16() / lo16() markers if required.
+ switch (access) {
+ case PPCII::MO_LO:
+ Expr = PPCMCExpr::CreateLo(Expr, isDarwin, Ctx);
+ break;
+ case PPCII::MO_HA:
+ Expr = PPCMCExpr::CreateHa(Expr, isDarwin, Ctx);
+ break;
+ }
+
+ return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+ AsmPrinter &AP, bool isDarwin) {
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ MI->dump();
+ llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_Register:
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ MCOp = MCOperand::CreateReg(MO.getReg());
+ break;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::CreateImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+ MO.getMBB()->getSymbol(), AP.OutContext));
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP, isDarwin);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP, isDarwin);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP,
+ isDarwin);
+ break;
+ case MachineOperand::MO_RegisterMask:
+ continue;
+ }
+
+ OutMI.addOperand(MCOp);
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..9da1b1b5c754
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PPCMachineFunctionInfo.cpp - Private data used for PowerPC --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+void PPCFunctionInfo::anchor() { }
+
+MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
+ const DataLayout *DL = MF.getTarget().getDataLayout();
+ return MF.getContext().GetOrCreateSymbol(Twine(DL->getPrivateGlobalPrefix())+
+ Twine(MF.getFunctionNumber())+"$poff");
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 000000000000..9a2cec744274
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,191 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+ /// stored. Also used as an anchor for instructions that need to be altered
+ /// when using frame pointers (dyna_add, dyna_sub.)
+ int FramePointerSaveIndex;
+
+ /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+ ///
+ int ReturnAddrSaveIndex;
+
+ /// Frame index where the old base pointer is stored.
+ int BasePointerSaveIndex;
+
+ /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+ /// function. This is only valid after the initial scan of the function by
+ /// PEI.
+ bool MustSaveLR;
+
+ /// Does this function have any stack spills.
+ bool HasSpills;
+
+ /// Does this function spill using instructions with only r+r (not r+i)
+ /// forms.
+ bool HasNonRISpills;
+
+ /// SpillsCR - Indicates whether CR is spilled in the current function.
+ bool SpillsCR;
+
+ /// Indicates whether VRSAVE is spilled in the current function.
+ bool SpillsVRSAVE;
+
+ /// LRStoreRequired - The bool indicates whether there is some explicit use of
+ /// the LR/LR8 stack slot that is not obvious from scanning the code. This
+ /// requires that the code generator produce a store of LR to the stack on
+ /// entry, even though LR may otherwise apparently not be used.
+ bool LRStoreRequired;
+
+ /// MinReservedArea - This is the frame size that is at least reserved in a
+ /// potential caller (parameter+linkage area).
+ unsigned MinReservedArea;
+
+ /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+ /// amount the stack pointer is adjusted to make the frame bigger for tail
+ /// calls. Used for creating an area before the register spill area.
+ int TailCallSPDelta;
+
+ /// HasFastCall - Does this function contain a fast call. Used to determine
+ /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+ bool HasFastCall;
+
+ /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+ /// VarArgsStackOffset - StackOffset for start of stack
+ /// arguments.
+ int VarArgsStackOffset;
+ /// VarArgsNumGPR - Index of the first unused integer
+ /// register for parameter passing.
+ unsigned VarArgsNumGPR;
+ /// VarArgsNumFPR - Index of the first unused double
+ /// register for parameter passing.
+ unsigned VarArgsNumFPR;
+
+ /// CRSpillFrameIndex - FrameIndex for CR spill slot for 32-bit SVR4.
+ int CRSpillFrameIndex;
+
+ /// If any of CR[2-4] need to be saved in the prologue and restored in the
+ /// epilogue then they are added to this array. This is used for the
+ /// 64-bit SVR4 ABI.
+ SmallVector<unsigned, 3> MustSaveCRs;
+
+ /// Hold onto our MachineFunction context.
+ MachineFunction &MF;
+
+ /// Whether this uses the PIC Base register or not.
+ bool UsesPICBase;
+
+public:
+ explicit PPCFunctionInfo(MachineFunction &MF)
+ : FramePointerSaveIndex(0),
+ ReturnAddrSaveIndex(0),
+ BasePointerSaveIndex(0),
+ HasSpills(false),
+ HasNonRISpills(false),
+ SpillsCR(false),
+ SpillsVRSAVE(false),
+ LRStoreRequired(false),
+ MinReservedArea(0),
+ TailCallSPDelta(0),
+ HasFastCall(false),
+ VarArgsFrameIndex(0),
+ VarArgsStackOffset(0),
+ VarArgsNumGPR(0),
+ VarArgsNumFPR(0),
+ CRSpillFrameIndex(0),
+ MF(MF),
+ UsesPICBase(0) {}
+
+ int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+ void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+
+ int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+ void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+ int getBasePointerSaveIndex() const { return BasePointerSaveIndex; }
+ void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; }
+
+ unsigned getMinReservedArea() const { return MinReservedArea; }
+ void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+ int getTailCallSPDelta() const { return TailCallSPDelta; }
+ void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+ /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+ /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+ /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+ /// which is used in PIC generation), or if the LR stack slot is explicitly
+ /// referenced by builtin_return_address.
+ void setMustSaveLR(bool U) { MustSaveLR = U; }
+ bool mustSaveLR() const { return MustSaveLR; }
+
+ void setHasSpills() { HasSpills = true; }
+ bool hasSpills() const { return HasSpills; }
+
+ void setHasNonRISpills() { HasNonRISpills = true; }
+ bool hasNonRISpills() const { return HasNonRISpills; }
+
+ void setSpillsCR() { SpillsCR = true; }
+ bool isCRSpilled() const { return SpillsCR; }
+
+ void setSpillsVRSAVE() { SpillsVRSAVE = true; }
+ bool isVRSAVESpilled() const { return SpillsVRSAVE; }
+
+ void setLRStoreRequired() { LRStoreRequired = true; }
+ bool isLRStoreRequired() const { return LRStoreRequired; }
+
+ void setHasFastCall() { HasFastCall = true; }
+ bool hasFastCall() const { return HasFastCall;}
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+ unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+ void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+ unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+ void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+
+ int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
+ void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
+
+ const SmallVectorImpl<unsigned> &
+ getMustSaveCRs() const { return MustSaveCRs; }
+ void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
+
+ void setUsesPICBase(bool uses) { UsesPICBase = uses; }
+ bool usesPICBase() const { return UsesPICBase; }
+
+ MCSymbol *getPICOffsetSymbol() const;
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 000000000000..17b836d1ed97
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+ 202162278U, // <0,0,0,0>: Cost 1 vspltisw0 LHS
+ 1140850790U, // <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+ 2617247181U, // <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+ 2635163787U, // <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+ 1543507254U, // <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+ 2281701705U, // <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+ 2617250133U, // <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+ 2659054575U, // <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,0,u>: Cost 1 vspltisw0 LHS
+ 1141686282U, // <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+ 67944550U, // <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 1685241958U, // <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2215870716U, // <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1141727570U, // <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 2215428562U, // <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+ 2215428589U, // <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+ 2659062768U, // <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+ 67945117U, // <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+ 2684356045U, // <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+ 2216009830U, // <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2216009901U, // <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+ 2698290853U, // <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+ 3289751890U, // <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+ 3758098275U, // <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+ 2684356538U, // <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+ 3758098410U, // <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+ 2216010397U, // <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2702272651U, // <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+ 2216656998U, // <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 3844669704U, // <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+ 2216657148U, // <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+ 2684357122U, // <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+ 3732820066U, // <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+ 3778005624U, // <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+ 3374713464U, // <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+ 2216657565U, // <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217361408U, // <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+ 1143619686U, // <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 3291103405U, // <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+ 3827269988U, // <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+ 1143619922U, // <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1610616118U, // <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 3758099833U, // <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+ 3854107016U, // <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+ 1143620253U, // <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2284396544U, // <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+ 2218025062U, // <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+ 3758100203U, // <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+ 3395966100U, // <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+ 3804549052U, // <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+ 2302314964U, // <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+ 2785821138U, // <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 3395966428U, // <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+ 2787148260U, // <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+ 2684358997U, // <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+ 2218631270U, // <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2684359162U, // <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+ 3758101042U, // <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+ 3732843830U, // <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+ 3758101227U, // <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+ 2684359480U, // <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+ 2724836173U, // <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+ 2725499806U, // <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+ 2726163439U, // <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 2219311206U, // <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+ 3868557900U, // <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+ 3377400112U, // <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+ 2684360038U, // <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+ 3732852834U, // <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+ 3871507060U, // <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+ 2303658616U, // <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+ 2726163439U, // <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+ 202162278U, // <0,0,u,0>: Cost 1 vspltisw0 LHS
+ 72589414U, // <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 1685242525U, // <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+ 2220073212U, // <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+ 1146331474U, // <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+ 1610619034U, // <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+ 2785821138U, // <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+ 2659120119U, // <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+ 72589981U, // <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+ 2698297344U, // <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+ 1624555622U, // <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 2758984428U, // <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+ 2635237524U, // <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+ 2693652818U, // <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+ 2281701714U, // <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+ 2698297846U, // <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+ 2659128312U, // <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+ 1624556189U, // <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+ 1543585802U, // <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+ 1141728052U, // <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1141728150U, // <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2295644334U, // <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+ 1543589174U, // <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+ 2290999634U, // <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+ 2617332135U, // <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+ 2617332720U, // <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+ 1142171004U, // <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+ 1561509990U, // <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+ 2623308516U, // <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+ 2698298984U, // <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+ 835584U, // <0,1,2,3>: Cost 0 copy LHS
+ 1561513270U, // <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+ 2647199304U, // <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+ 2698299322U, // <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+ 1585402874U, // <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+ 835584U, // <0,1,2,u>: Cost 0 copy LHS
+ 2698299540U, // <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+ 3290399540U, // <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+ 2698299720U, // <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+ 2698299804U, // <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+ 2698299906U, // <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+ 3832726521U, // <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+ 2724842160U, // <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+ 2706926275U, // <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+ 2698300190U, // <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+ 2635268198U, // <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+ 2217362228U, // <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+ 2217362326U, // <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+ 2635270296U, // <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+ 2635271478U, // <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+ 1624558902U, // <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2659160910U, // <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+ 2659161084U, // <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+ 1624559145U, // <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 3832726639U, // <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+ 2714889871U, // <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+ 2302314646U, // <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 3834717321U, // <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+ 3832726679U, // <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+ 2717544403U, // <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+ 2718208036U, // <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+ 3792613493U, // <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+ 2719535302U, // <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+ 2659172454U, // <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+ 3832726735U, // <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+ 2724844026U, // <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+ 3775361608U, // <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+ 2659175734U, // <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+ 3832726771U, // <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+ 2724844344U, // <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+ 1651102542U, // <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+ 1651766175U, // <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+ 2724844536U, // <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+ 3377397770U, // <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+ 2698302636U, // <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+ 2728162531U, // <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+ 2724844902U, // <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+ 3377398098U, // <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+ 2724845076U, // <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+ 2724845164U, // <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+ 2724845186U, // <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+ 1561559142U, // <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+ 1146331956U, // <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+ 1146332054U, // <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 835584U, // <0,1,u,3>: Cost 0 copy LHS
+ 1561562422U, // <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+ 1624561818U, // <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+ 2220074191U, // <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+ 1585452032U, // <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+ 835584U, // <0,1,u,u>: Cost 0 copy LHS
+ 2214593997U, // <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+ 2214675999U, // <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+ 2214594152U, // <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+ 1207959654U, // <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 3709054262U, // <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+ 3375350836U, // <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+ 2214594490U, // <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+ 3288336362U, // <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+ 1207959659U, // <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 2215871994U, // <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2215470623U, // <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1141728872U, // <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1141728934U, // <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2215872323U, // <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2215872405U, // <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1141729210U, // <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2215430122U, // <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1141729368U, // <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+ 3289736698U, // <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+ 3289744927U, // <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+ 2216011368U, // <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+ 2216019622U, // <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+ 3289769795U, // <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+ 3289778069U, // <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+ 2216044474U, // <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+ 3732960259U, // <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+ 2216061016U, // <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+ 2758985382U, // <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+ 2758985392U, // <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+ 3290400360U, // <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+ 2758985408U, // <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+ 2758985422U, // <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+ 2785822424U, // <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+ 3290400698U, // <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+ 2765915876U, // <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+ 2758985453U, // <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+ 3291104762U, // <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+ 2217362979U, // <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2217363048U, // <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+ 2217363110U, // <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+ 3291105087U, // <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+ 3291105173U, // <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+ 2217363386U, // <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+ 3788639688U, // <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+ 2217363515U, // <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+ 3376054371U, // <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+ 3788639888U, // <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+ 3376055912U, // <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+ 2302312550U, // <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3376054375U, // <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+ 3374728244U, // <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+ 3805229154U, // <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+ 3376055512U, // <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+ 2302312555U, // <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+ 3709100134U, // <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+ 3709100950U, // <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+ 3709102010U, // <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+ 2758985658U, // <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+ 3709103414U, // <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+ 3732992098U, // <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+ 3292374970U, // <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+ 3798594383U, // <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+ 2758985703U, // <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+ 3788641274U, // <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+ 3377398508U, // <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+ 3377398590U, // <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+ 2303656038U, // <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 3709111606U, // <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+ 3377398836U, // <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+ 3803903447U, // <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+ 3293054954U, // <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+ 2303656043U, // <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2220074490U, // <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+ 2220074527U, // <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+ 1146332776U, // <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1146332838U, // <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+ 2220074819U, // <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+ 2220074901U, // <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+ 1146333114U, // <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2220074986U, // <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1146333243U, // <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+ 2629410816U, // <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+ 2753530006U, // <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+ 2629412301U, // <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+ 2214594972U, // <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+ 2758985908U, // <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+ 3733016674U, // <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+ 3777364488U, // <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+ 2281703354U, // <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+ 2758985941U, // <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+ 1141729430U, // <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2215471334U, // <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2215471425U, // <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1141729692U, // <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1141729794U, // <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2215430738U, // <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+ 2215430776U, // <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+ 2295646138U, // <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+ 1141730078U, // <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2758986032U, // <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+ 3709141910U, // <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+ 3289753921U, // <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+ 2770929992U, // <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+ 3289754114U, // <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+ 3362095460U, // <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+ 3832727910U, // <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+ 3365414842U, // <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+ 2771298677U, // <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+ 2216659094U, // <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+ 3290409190U, // <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+ 2703624496U, // <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+ 2216683932U, // <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+ 2216692226U, // <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+ 3733041250U, // <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+ 3832727988U, // <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+ 3374712762U, // <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+ 2216725278U, // <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+ 2217363606U, // <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+ 3291105510U, // <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+ 3291105601U, // <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+ 2217363868U, // <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+ 2217363970U, // <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+ 2758986242U, // <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+ 3727077685U, // <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+ 3364767674U, // <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+ 2217364254U, // <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+ 3832728102U, // <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+ 3405916003U, // <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+ 3376055840U, // <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+ 3376055679U, // <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+ 3376055194U, // <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+ 3859565138U, // <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+ 2727514210U, // <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 3376056250U, // <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+ 2727514210U, // <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+ 2758986360U, // <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 3709174678U, // <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+ 3795284411U, // <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+ 3709175980U, // <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+ 3833096860U, // <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+ 3376728235U, // <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+ 3859565229U, // <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+ 2773879472U, // <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+ 2758986360U, // <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+ 2303656854U, // <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+ 3807229018U, // <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+ 2727515284U, // <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+ 3377399410U, // <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+ 3377398682U, // <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+ 3801257409U, // <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+ 3377399980U, // <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+ 3375409082U, // <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+ 2731497082U, // <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+ 1146333334U, // <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 2220075238U, // <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+ 2220075329U, // <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+ 1146333596U, // <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1146333698U, // <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2758986566U, // <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+ 2803739472U, // <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+ 2295703482U, // <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+ 1146333982U, // <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+ 2214595473U, // <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+ 2693677158U, // <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+ 3839437689U, // <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+ 3709200559U, // <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+ 2693677394U, // <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+ 1140854070U, // <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 3767419409U, // <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+ 3854109604U, // <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+ 1140854313U, // <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 1141689234U, // <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2215431114U, // <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+ 2215431221U, // <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635466928U, // <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+ 1141689552U, // <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 67947830U, // <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2215431545U, // <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2659357716U, // <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+ 67948073U, // <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 3767420369U, // <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+ 3767420451U, // <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+ 3767420520U, // <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+ 2698323625U, // <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+ 3709218102U, // <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+ 2216013110U, // <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767420858U, // <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+ 3774719981U, // <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+ 2216013353U, // <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 3767421078U, // <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+ 3776710880U, // <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+ 3833097325U, // <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+ 3767421340U, // <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+ 3767421442U, // <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+ 2216660278U, // <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 3833097361U, // <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+ 3780692678U, // <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+ 2216660521U, // <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2617573416U, // <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+ 2217364450U, // <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+ 3691316771U, // <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+ 3709233331U, // <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+ 2785823952U, // <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+ 1143622966U, // <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 3691319723U, // <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+ 3854109932U, // <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+ 1143623209U, // <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2635497574U, // <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+ 2635498390U, // <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+ 3709240936U, // <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+ 2635499700U, // <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+ 2635500854U, // <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+ 2785824044U, // <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+ 1685245238U, // <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659390488U, // <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+ 1685245256U, // <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 3839438161U, // <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+ 3798610347U, // <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+ 3798610426U, // <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+ 3795956237U, // <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+ 3733138742U, // <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+ 2218634550U, // <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+ 3798610744U, // <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+ 2724868945U, // <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+ 2725532578U, // <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+ 3383371465U, // <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+ 3800601668U, // <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+ 3775386826U, // <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+ 3801928934U, // <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+ 3721202998U, // <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+ 2780368328U, // <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 3383372686U, // <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+ 3854110170U, // <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+ 2780368328U, // <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+ 1146334098U, // <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2220076002U, // <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+ 2220076085U, // <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+ 2635524279U, // <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+ 1146334416U, // <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 72592694U, // <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 1685245481U, // <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2659415067U, // <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+ 72592937U, // <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+ 2281704337U, // <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+ 2704965734U, // <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 3778707666U, // <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+ 3778707708U, // <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+ 2687050057U, // <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+ 2214596612U, // <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+ 2785824372U, // <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+ 3854110332U, // <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+ 2704966301U, // <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 1567768678U, // <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+ 2312236570U, // <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+ 2215431915U, // <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641512598U, // <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+ 1567771538U, // <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+ 1141690372U, // <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1141690466U, // <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2641515514U, // <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+ 1141690615U, // <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+ 3772736973U, // <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+ 3778709024U, // <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+ 3778709096U, // <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+ 3778709158U, // <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+ 3772737275U, // <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+ 3859566351U, // <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+ 3778709434U, // <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+ 3805251562U, // <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+ 3775391807U, // <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+ 2704967830U, // <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+ 3776719073U, // <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+ 3777382706U, // <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+ 3778709887U, // <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+ 2704968148U, // <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+ 3857428317U, // <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+ 3364096514U, // <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+ 3780700871U, // <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+ 2707622680U, // <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+ 2728856466U, // <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+ 3697361674U, // <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+ 3697362601U, // <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+ 3364766635U, // <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+ 2217365428U, // <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+ 2704969014U, // <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 2785824700U, // <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+ 3364766963U, // <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+ 2704969257U, // <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+ 3846148050U, // <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+ 2326203282U, // <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+ 3291746027U, // <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+ 3376054482U, // <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+ 3790655366U, // <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+ 2785824772U, // <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+ 2724876386U, // <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+ 3858903057U, // <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+ 2736820484U, // <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+ 2659467366U, // <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+ 3859566643U, // <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+ 3798618618U, // <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+ 3852857410U, // <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+ 2659470646U, // <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+ 2659471458U, // <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 3832729696U, // <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+ 1712083042U, // <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+ 1712156779U, // <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+ 2731512826U, // <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+ 3859566717U, // <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+ 3798619284U, // <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+ 3778712803U, // <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+ 2728858936U, // <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+ 3859566753U, // <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+ 3377398135U, // <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+ 3798619686U, // <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+ 2731513468U, // <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+ 1567826022U, // <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+ 2704971566U, // <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+ 2220076779U, // <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+ 2641569942U, // <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+ 1567828889U, // <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+ 1146335236U, // <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1146335330U, // <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 1713410308U, // <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+ 1713484045U, // <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+ 2214596949U, // <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+ 2214678951U, // <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+ 2214597114U, // <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+ 3852857653U, // <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+ 3832729919U, // <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+ 3721293427U, // <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+ 2214597432U, // <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+ 1207962934U, // <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 1207962935U, // <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 2215432481U, // <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+ 2215432615U, // <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1141690874U, // <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2215432754U, // <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2215432817U, // <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+ 2215432939U, // <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+ 1141691192U, // <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221905718U, // <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 1221905719U, // <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 3852857787U, // <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+ 3289764265U, // <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+ 3289690618U, // <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+ 3862589907U, // <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+ 3733253430U, // <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+ 3733254242U, // <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+ 3777390522U, // <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+ 2785825274U, // <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+ 2785825283U, // <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+ 3777390742U, // <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+ 3863106066U, // <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+ 3777390899U, // <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+ 3290436146U, // <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+ 3779381762U, // <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+ 3779381798U, // <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+ 3733262920U, // <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+ 2300972342U, // <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2300972343U, // <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 3802606482U, // <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+ 2217365931U, // <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2217366010U, // <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+ 3291107890U, // <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+ 3291099805U, // <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+ 3777391926U, // <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+ 2217366328U, // <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+ 2291027254U, // <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 2291027255U, // <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 3852858033U, // <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+ 3395964532U, // <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+ 3864507069U, // <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+ 3376056678U, // <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+ 3721334070U, // <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+ 3395964860U, // <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+ 3864802017U, // <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+ 2302315830U, // <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 2302315831U, // <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 3852858108U, // <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+ 3398624745U, // <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+ 2218668538U, // <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+ 3292418610U, // <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+ 3733286198U, // <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+ 3797299889U, // <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+ 2785825592U, // <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+ 2785825602U, // <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+ 2785825611U, // <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+ 2785825614U, // <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+ 2758988632U, // <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+ 3377400084U, // <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+ 2792166248U, // <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+ 2785825654U, // <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+ 2785825664U, // <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 3859567493U, // <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+ 2303659318U, // <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303659319U, // <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2785825695U, // <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+ 2220077479U, // <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+ 1146335738U, // <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2792829881U, // <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+ 2785825735U, // <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+ 2785825664U, // <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+ 1146336056U, // <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1221963062U, // <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 1221963063U, // <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 2653593600U, // <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+ 2706309222U, // <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 3709421498U, // <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+ 2281705978U, // <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+ 2785825816U, // <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+ 2785825826U, // <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+ 2653598037U, // <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+ 2214598252U, // <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+ 2706309789U, // <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 1141691386U, // <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2215433290U, // <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+ 2706310038U, // <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+ 2322190842U, // <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+ 1141691750U, // <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2215433654U, // <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+ 2653606230U, // <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+ 1141692012U, // <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1141692034U, // <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 2785825940U, // <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+ 3768108576U, // <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+ 3780052584U, // <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+ 2794820780U, // <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+ 3859641528U, // <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+ 3733327970U, // <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+ 3778062266U, // <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+ 3733328944U, // <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+ 2795189465U, // <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+ 2324861026U, // <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+ 3780053233U, // <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+ 3780053296U, // <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+ 3778062725U, // <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+ 3780053506U, // <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+ 3803941469U, // <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+ 2706311800U, // <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+ 3398603586U, // <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+ 2707639066U, // <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+ 2217366522U, // <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+ 3727369110U, // <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+ 3291108500U, // <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+ 3727370872U, // <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+ 2217366886U, // <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+ 2706312502U, // <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 3786026321U, // <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+ 2217367148U, // <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+ 2706312745U, // <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2322223202U, // <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+ 3399946987U, // <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+ 3291780244U, // <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+ 3727378582U, // <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+ 3727379766U, // <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+ 3859568054U, // <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+ 2785826241U, // <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+ 3395965762U, // <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+ 2787153363U, // <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+ 2785826268U, // <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+ 3780055420U, // <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+ 3859568110U, // <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+ 3874534903U, // <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+ 3859641856U, // <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+ 3733360738U, // <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+ 3859568145U, // <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+ 2797770260U, // <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+ 2797843997U, // <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+ 2785826342U, // <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+ 3727393686U, // <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+ 3868563003U, // <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+ 3377397988U, // <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+ 2219349350U, // <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+ 3859568217U, // <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+ 2730202588U, // <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+ 2785826412U, // <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+ 2731529854U, // <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+ 1146336250U, // <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 2706315054U, // <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+ 2653660845U, // <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+ 2322248186U, // <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+ 1146336614U, // <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2706315418U, // <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+ 2653663581U, // <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+ 1146336876U, // <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1146336898U, // <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+ 202162278U, // <0,u,0,0>: Cost 1 vspltisw0 LHS
+ 1624612966U, // <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+ 2629780986U, // <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+ 1207959708U, // <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+ 1544097078U, // <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+ 1140856986U, // <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+ 2698355253U, // <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+ 1207962952U, // <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+ 202162278U, // <0,u,0,u>: Cost 1 vspltisw0 LHS
+ 1142134483U, // <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+ 67950382U, // <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 1142175624U, // <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 1142175676U, // <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+ 1142134847U, // <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 67950746U, // <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1142175952U, // <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221905736U, // <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+ 67950949U, // <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+ 1562026086U, // <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+ 2216015662U, // <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+ 2698356328U, // <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+ 835584U, // <0,u,2,3>: Cost 0 copy LHS
+ 1562029366U, // <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+ 2216016026U, // <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+ 2698356666U, // <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+ 1585919033U, // <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+ 835584U, // <0,u,2,u>: Cost 0 copy LHS
+ 2758989756U, // <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+ 2216662830U, // <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2703665461U, // <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+ 2758989782U, // <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+ 2758989796U, // <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+ 2216663194U, // <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+ 2706319993U, // <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+ 2300972360U, // <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+ 2216663397U, // <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+ 2217367251U, // <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+ 1143625518U, // <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2217367432U, // <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+ 2217367484U, // <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+ 1143619922U, // <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1143625882U, // <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+ 2217367760U, // <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+ 2291027272U, // <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+ 1143626085U, // <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+ 2635792486U, // <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+ 2635793302U, // <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+ 2302314646U, // <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+ 2635794648U, // <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+ 2635795766U, // <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+ 2717601754U, // <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+ 1685248154U, // <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2302315848U, // <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+ 1685248172U, // <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+ 2759358645U, // <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+ 2218637102U, // <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+ 2724901370U, // <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+ 2758990032U, // <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+ 2659691830U, // <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+ 2659471458U, // <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+ 2724901688U, // <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+ 1651159893U, // <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+ 1651823526U, // <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+ 2785827072U, // <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+ 2803964168U, // <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+ 2727556249U, // <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+ 2303656092U, // <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 2785827112U, // <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+ 2785827122U, // <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+ 2730210781U, // <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+ 2303659336U, // <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+ 2303656097U, // <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+ 202162278U, // <0,u,u,0>: Cost 1 vspltisw0 LHS
+ 72595246U, // <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+ 1146337160U, // <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+ 835584U, // <0,u,u,3>: Cost 0 copy LHS
+ 1146337343U, // <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+ 72595610U, // <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+ 1146337488U, // <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+ 1221963080U, // <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+ 835584U, // <0,u,u,u>: Cost 0 copy LHS
+ 2756853760U, // <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+ 1677803530U, // <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+ 3759497387U, // <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+ 2686419196U, // <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+ 2751766565U, // <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+ 2687746462U, // <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+ 3776086518U, // <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+ 2689073728U, // <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+ 1678319689U, // <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+ 2287091712U, // <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+ 1147568230U, // <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+ 1683112038U, // <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 3294970108U, // <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+ 2623892790U, // <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+ 2647781007U, // <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+ 2791948430U, // <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 3721524218U, // <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+ 1683112092U, // <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2222112768U, // <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+ 1148371046U, // <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 3356862524U, // <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+ 2702345894U, // <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+ 2222113106U, // <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+ 2299709908U, // <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 3760162746U, // <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+ 3369470584U, // <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+ 1148371613U, // <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 2686421142U, // <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+ 2283128486U, // <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+ 3296305326U, // <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+ 3760163199U, // <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+ 3760163330U, // <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+ 3779406377U, // <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+ 3865690416U, // <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+ 3366824568U, // <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+ 2707655452U, // <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+ 2734861202U, // <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+ 2756854098U, // <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+ 3830595931U, // <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+ 3296968960U, // <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+ 3830595949U, // <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+ 2686422326U, // <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 3297378806U, // <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+ 3810594248U, // <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+ 2686422569U, // <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2284470272U, // <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+ 2284471974U, // <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+ 3809267435U, // <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+ 3297968384U, // <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+ 2284471977U, // <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+ 3721555603U, // <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+ 3792679010U, // <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+ 3792679037U, // <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+ 2284471981U, // <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+ 3356893184U, // <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+ 2224676966U, // <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+ 3298295985U, // <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+ 3298345212U, // <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+ 2224972114U, // <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 3808604907U, // <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+ 3799978808U, // <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+ 2726237006U, // <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+ 2224677522U, // <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+ 2726237176U, // <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+ 2285815462U, // <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+ 3805951193U, // <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+ 3807941859U, // <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+ 3799979366U, // <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+ 3803297165U, // <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+ 3799979540U, // <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+ 3799979628U, // <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+ 2731546240U, // <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+ 2284494848U, // <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+ 1683112594U, // <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+ 1683112605U, // <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 2734200772U, // <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+ 2757075629U, // <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+ 2686425242U, // <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+ 2791948430U, // <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+ 2736855304U, // <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+ 1683112659U, // <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1610694666U, // <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+ 1616003174U, // <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+ 2283767958U, // <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+ 3357507596U, // <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+ 2689745234U, // <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+ 3357507922U, // <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+ 3294397647U, // <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+ 3373433334U, // <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+ 1616003730U, // <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+ 1550221414U, // <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,1,1>: Cost 1 vspltisw1 LHS
+ 2287093910U, // <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+ 2287092615U, // <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+ 1550224694U, // <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 2287092050U, // <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+ 2689746127U, // <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+ 2659800138U, // <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+ 269271142U, // <1,1,1,u>: Cost 1 vspltisw1 LHS
+ 2222113516U, // <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+ 2756854663U, // <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+ 1148371862U, // <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689746598U, // <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+ 2618002742U, // <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+ 2299707730U, // <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+ 2689746874U, // <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+ 3361506511U, // <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+ 1148371862U, // <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689747094U, // <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+ 2691074278U, // <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+ 3356870806U, // <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+ 2283126958U, // <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+ 2689747458U, // <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+ 3356868946U, // <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+ 3811265144U, // <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+ 3362841807U, // <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+ 2689747742U, // <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+ 2623987814U, // <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+ 2758181931U, // <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+ 2223408022U, // <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 3697731734U, // <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+ 2283798784U, // <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+ 1616006454U, // <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 3297379535U, // <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+ 3373466102U, // <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+ 1616006697U, // <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2760762479U, // <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+ 2284470282U, // <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+ 2284472470U, // <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+ 3358212270U, // <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+ 2284470285U, // <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+ 1210728786U, // <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2737524834U, // <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+ 3360867535U, // <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+ 1210728786U, // <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 3697746022U, // <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+ 2756854991U, // <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+ 2737525242U, // <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+ 3839149281U, // <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+ 3697749302U, // <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+ 3356893522U, // <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+ 2283151537U, // <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+ 2791949566U, // <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+ 2792613127U, // <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+ 2737525754U, // <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+ 2291786386U, // <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+ 3365528292U, // <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+ 3365528455U, // <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+ 2737526118U, // <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+ 3365527890U, // <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+ 3365528377U, // <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+ 2291786959U, // <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+ 2737526402U, // <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+ 1550221414U, // <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,1,u,1>: Cost 1 vspltisw1 LHS
+ 1148371862U, // <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 2689750972U, // <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+ 1550224694U, // <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1616009370U, // <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+ 2689751248U, // <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+ 2736863497U, // <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+ 269271142U, // <1,1,u,u>: Cost 1 vspltisw1 LHS
+ 2702360576U, // <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+ 1628618854U, // <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2685771949U, // <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+ 2283765862U, // <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 2702360914U, // <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+ 3788046813U, // <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+ 2688426481U, // <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+ 2726249024U, // <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1628619421U, // <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+ 2690417380U, // <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+ 2702361396U, // <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+ 2287093352U, // <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+ 1213349990U, // <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 3764159522U, // <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+ 3295053672U, // <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+ 2221311930U, // <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+ 3799991593U, // <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+ 1213349995U, // <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 2624045158U, // <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+ 2702362144U, // <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+ 2283120232U, // <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+ 1225965670U, // <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2624048438U, // <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+ 3356860763U, // <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+ 2222114746U, // <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+ 2299708632U, // <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+ 1225965675U, // <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 470597734U, // <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544340276U, // <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544341096U, // <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544341916U, // <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+ 470601014U, // <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592119300U, // <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592119802U, // <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592120314U, // <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470603566U, // <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708335471U, // <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+ 3838043908U, // <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+ 3357541992U, // <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+ 2283798630U, // <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2726251728U, // <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+ 1628622134U, // <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 3297077178U, // <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+ 2726251976U, // <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1628622377U, // <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 2714308168U, // <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+ 3297633827U, // <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+ 2284471912U, // <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+ 1210728550U, // <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 3776106420U, // <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+ 2726252548U, // <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+ 2726252642U, // <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+ 3799994538U, // <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+ 1210728555U, // <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720280865U, // <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+ 2702365096U, // <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+ 2726253050U, // <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+ 2283151462U, // <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 3697823030U, // <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+ 3298715497U, // <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+ 2726253368U, // <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+ 2724926296U, // <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+ 2283151467U, // <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652511738U, // <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+ 3371500916U, // <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+ 3365529192U, // <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+ 2291785830U, // <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2726253926U, // <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+ 3788051845U, // <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+ 3794023894U, // <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+ 2726254119U, // <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+ 1657820802U, // <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+ 470638699U, // <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544381236U, // <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544382056U, // <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544382614U, // <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 470641974U, // <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1628625050U, // <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+ 1592160762U, // <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592161274U, // <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 470644526U, // <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2769389708U, // <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+ 2685780070U, // <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2685780142U, // <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+ 2686443775U, // <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+ 2769684656U, // <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+ 3357507940U, // <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+ 3759522294U, // <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+ 3357509562U, // <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+ 2685780637U, // <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2287092630U, // <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+ 2221312230U, // <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+ 2691752839U, // <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+ 2287093362U, // <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+ 2287092634U, // <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+ 3360835107U, // <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+ 3759523041U, // <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+ 2287093690U, // <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+ 2287092638U, // <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+ 2222114966U, // <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+ 2222115057U, // <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+ 2630092320U, // <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+ 2685781670U, // <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+ 2222115330U, // <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+ 3373449572U, // <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+ 2222115448U, // <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2299709370U, // <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+ 2222115614U, // <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+ 2771380607U, // <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+ 3356874468U, // <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+ 3759524168U, // <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+ 2283792796U, // <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+ 3356869530U, // <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+ 3721760428U, // <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+ 3296496248U, // <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+ 3356870586U, // <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+ 2771970503U, // <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+ 2772044240U, // <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+ 3362186135U, // <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+ 3297151280U, // <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+ 3357542002U, // <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+ 3357540626U, // <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+ 2685783350U, // <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 3357546622U, // <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+ 3357542330U, // <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+ 2685783593U, // <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2284471190U, // <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+ 3358213015U, // <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+ 2630116899U, // <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+ 2284471922U, // <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+ 2284471194U, // <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+ 2284471843U, // <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+ 3358218366U, // <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+ 2284472250U, // <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+ 2284471198U, // <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+ 2224752790U, // <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+ 3832736385U, // <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+ 3703866916U, // <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+ 3356894834U, // <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+ 3356894106U, // <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+ 3356894755U, // <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+ 3356899130U, // <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+ 2283153338U, // <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2283153338U, // <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+ 2774035139U, // <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+ 3703874767U, // <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+ 3703875109U, // <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+ 3365529202U, // <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+ 3365528474U, // <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+ 3789387159U, // <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+ 3865692927U, // <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+ 3363538874U, // <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+ 2774625035U, // <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+ 2284495766U, // <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+ 2685785902U, // <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2630141478U, // <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+ 2283169880U, // <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+ 2284495770U, // <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+ 2685786266U, // <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+ 2222115448U, // <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+ 2284496826U, // <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+ 2685786469U, // <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+ 2684461069U, // <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+ 2686451814U, // <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 3759530159U, // <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+ 2686451968U, // <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2684461394U, // <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+ 1701989266U, // <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+ 3776119286U, // <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+ 2689106500U, // <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+ 1702210477U, // <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+ 2221312914U, // <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+ 2691097399U, // <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+ 3760194454U, // <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+ 3766166489U, // <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+ 2334870736U, // <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+ 1147571510U, // <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 3760194794U, // <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+ 3867315188U, // <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+ 1147571753U, // <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2222115730U, // <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+ 2222115812U, // <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 3760195176U, // <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+ 2702378662U, // <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+ 2323598544U, // <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+ 1148374326U, // <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 3760195514U, // <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+ 3373451932U, // <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+ 1148374569U, // <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2702379160U, // <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+ 3760195840U, // <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+ 3776121160U, // <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+ 3760195996U, // <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+ 2686454274U, // <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+ 3356870350U, // <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+ 3800009392U, // <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+ 3366824604U, // <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+ 2707688224U, // <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+ 2775731368U, // <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+ 3830820018U, // <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+ 3691980454U, // <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+ 3357541282U, // <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+ 2781039824U, // <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+ 2686455094U, // <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 3357541528U, // <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+ 3810627020U, // <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+ 2686455337U, // <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+ 2624217190U, // <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+ 2284470309U, // <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+ 2618246822U, // <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+ 3358212297U, // <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+ 2284470312U, // <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+ 2284470637U, // <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+ 1683115318U, // <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3721851898U, // <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+ 1683115336U, // <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 3794039075U, // <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+ 3830820186U, // <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+ 3800011258U, // <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+ 3807973938U, // <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+ 3298716880U, // <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+ 2224680246U, // <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 3800011576U, // <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+ 2726269774U, // <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+ 2224680489U, // <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726269948U, // <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+ 3383444141U, // <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+ 3805983961U, // <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+ 3807974667U, // <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+ 2736887142U, // <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+ 3365528403U, // <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+ 3800012308U, // <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+ 3800012396U, // <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+ 2731579012U, // <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+ 2624241766U, // <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+ 2686457646U, // <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+ 2618271398U, // <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+ 2734233544U, // <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+ 2689775679U, // <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+ 1152355638U, // <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+ 1683115561U, // <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2736888076U, // <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+ 1683115579U, // <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 2687123456U, // <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+ 1613381734U, // <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759538352U, // <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+ 3760865532U, // <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+ 1613381970U, // <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2687787427U, // <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+ 2781777524U, // <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+ 3733828717U, // <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+ 1613382301U, // <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2781040271U, // <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+ 2687124276U, // <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+ 2687124374U, // <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+ 3760866297U, // <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+ 2693096491U, // <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+ 2687124591U, // <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+ 2687124723U, // <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+ 3360834803U, // <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+ 2687124860U, // <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+ 2323598792U, // <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+ 2687125027U, // <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+ 2687125096U, // <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+ 2687125158U, // <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+ 2642185188U, // <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+ 2323598554U, // <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+ 2687125434U, // <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+ 3373450483U, // <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+ 2687125563U, // <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+ 2687125654U, // <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+ 2312990234U, // <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+ 3760867649U, // <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+ 2687125916U, // <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+ 2687126018U, // <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+ 3386731738U, // <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+ 3356871170U, // <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+ 3808643779U, // <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+ 2687126302U, // <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+ 2642198630U, // <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+ 2687126498U, // <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+ 3715941923U, // <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+ 3709970701U, // <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+ 2687126736U, // <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+ 1613385014U, // <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2283801090U, // <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 3733861489U, // <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+ 1613385257U, // <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2624290918U, // <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+ 2624291676U, // <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+ 3698034211U, // <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+ 2284471211U, // <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+ 2624294198U, // <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+ 2284471132U, // <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+ 2284472834U, // <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+ 2284471539U, // <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+ 2284471216U, // <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+ 2785316900U, // <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+ 2781040691U, // <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+ 2734903802U, // <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+ 3848736834U, // <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+ 3298717620U, // <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+ 3298717700U, // <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+ 2734904120U, // <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+ 2781040738U, // <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+ 2781040747U, // <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+ 2734904314U, // <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+ 2315677210U, // <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+ 3808646292U, // <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+ 3808646371U, // <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+ 2734904678U, // <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+ 3389418714U, // <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+ 3365528656U, // <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+ 2734904940U, // <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+ 2734904962U, // <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+ 2687129299U, // <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+ 1613387566U, // <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 2687129480U, // <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+ 2687129532U, // <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+ 1661163546U, // <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+ 1613387930U, // <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+ 2687129808U, // <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+ 2781040900U, // <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+ 1613388133U, // <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+ 3759546368U, // <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+ 2685804646U, // <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2685804721U, // <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+ 3861270834U, // <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+ 3759546706U, // <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+ 2687795620U, // <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+ 2688459253U, // <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+ 2283769142U, // <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 2685805213U, // <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 3698073702U, // <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+ 3759547188U, // <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+ 2221314554U, // <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+ 3759547401U, // <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+ 3698076982U, // <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+ 3767510141U, // <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+ 2334872376U, // <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+ 1213353270U, // <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 1213353271U, // <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 3704053862U, // <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+ 3759547961U, // <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+ 2222117370U, // <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+ 3759548070U, // <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+ 3704057142U, // <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+ 3373451057U, // <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+ 2685806522U, // <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+ 1225968950U, // <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1225968951U, // <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 3759548566U, // <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+ 3842912793U, // <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+ 3759548774U, // <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+ 3759548828U, // <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+ 3759548930U, // <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+ 3809315421U, // <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+ 3386733368U, // <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+ 2283130166U, // <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 2283130167U, // <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+ 3704070246U, // <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+ 3862229608U, // <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+ 3704071741U, // <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+ 3721988610U, // <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+ 3704073526U, // <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+ 2685807926U, // <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3865621141U, // <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+ 2283801910U, // <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 2685808169U, // <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 3710050406U, // <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+ 3710051571U, // <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+ 3405989597U, // <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+ 3358214502U, // <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+ 3710053686U, // <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+ 3721998025U, // <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+ 2332250936U, // <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+ 1210731830U, // <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210731831U, // <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 2791289597U, // <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+ 3698115430U, // <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+ 3698116538U, // <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+ 3356894132U, // <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+ 3698117942U, // <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+ 3722006218U, // <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+ 2781041464U, // <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+ 2283154742U, // <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283154743U, // <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 1718211406U, // <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+ 2792026967U, // <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+ 2765411170U, // <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+ 3854783336U, // <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+ 2781041526U, // <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+ 3365528664U, // <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+ 2791953290U, // <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+ 2291789110U, // <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1718801302U, // <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+ 1718875039U, // <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+ 2685810478U, // <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+ 2792764337U, // <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+ 3759552444U, // <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+ 2781041607U, // <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+ 2685810842U, // <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+ 2689792208U, // <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+ 1210756406U, // <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 1210756407U, // <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 2793280496U, // <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+ 2694439014U, // <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 3393343912U, // <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+ 3397325306U, // <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+ 2793575444U, // <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+ 3722030797U, // <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+ 2688467446U, // <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+ 2689131079U, // <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+ 2694439570U, // <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+ 2654265354U, // <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+ 2794017866U, // <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+ 3768181639U, // <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+ 2334872058U, // <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+ 2654268726U, // <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+ 3792069797U, // <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+ 2694440143U, // <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+ 2334872386U, // <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+ 2695767409U, // <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+ 2654273638U, // <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2222117973U, // <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+ 2299711912U, // <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2654275734U, // <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+ 2654276918U, // <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+ 3385397675U, // <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+ 2654278056U, // <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+ 2323599627U, // <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+ 2654279470U, // <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+ 2795271395U, // <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+ 3768183059U, // <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+ 3728025254U, // <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+ 3768183196U, // <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+ 3768183298U, // <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+ 3792071255U, // <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+ 3780127361U, // <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+ 3847779617U, // <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+ 2795861291U, // <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+ 2795935028U, // <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+ 3728032975U, // <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+ 3839153480U, // <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+ 3397358074U, // <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+ 3854783835U, // <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+ 2694442294U, // <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 3786100058U, // <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+ 3722065254U, // <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+ 2694442537U, // <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654298214U, // <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+ 3854783893U, // <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+ 3710126010U, // <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+ 2332250618U, // <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+ 2654301494U, // <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+ 2284474795U, // <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+ 2718330931U, // <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+ 2332250946U, // <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+ 2719658197U, // <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+ 2332921954U, // <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+ 3768185254U, // <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+ 3710134202U, // <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+ 3710134561U, // <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+ 3710135606U, // <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+ 3864884745U, // <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+ 3854784017U, // <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+ 2791953940U, // <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+ 2792617501U, // <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+ 2797925927U, // <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+ 3365528426U, // <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+ 3728058022U, // <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+ 3365528509U, // <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+ 3854784079U, // <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+ 3722088148U, // <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+ 3728060845U, // <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+ 2781042284U, // <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+ 2798515823U, // <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+ 2654322705U, // <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+ 2694444846U, // <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 2299711912U, // <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+ 2323649018U, // <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+ 2654326070U, // <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+ 2694445210U, // <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+ 2654327214U, // <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+ 2323649346U, // <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+ 2694445413U, // <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+ 1610752017U, // <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+ 1613406310U, // <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 2685821107U, // <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+ 2283765916U, // <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+ 1613406549U, // <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+ 1725880054U, // <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+ 2688475639U, // <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+ 2283769160U, // <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+ 1613406877U, // <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+ 1550221414U, // <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+ 269271142U, // <1,u,1,1>: Cost 1 vspltisw1 LHS
+ 1683117870U, // <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+ 1213350044U, // <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+ 1550224694U, // <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+ 1147574426U, // <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+ 2687149326U, // <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+ 1213353288U, // <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+ 269271142U, // <1,u,1,u>: Cost 1 vspltisw1 LHS
+ 2222118611U, // <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+ 1148376878U, // <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 1148371862U, // <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 1225965724U, // <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+ 2222118975U, // <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+ 1148377242U, // <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+ 2687150010U, // <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+ 1225968968U, // <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+ 1148377445U, // <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+ 471040156U, // <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1544782644U, // <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1544783464U, // <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544784022U, // <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471043382U, // <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1592561668U, // <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1592562170U, // <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1592562682U, // <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 471045934U, // <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+ 2708384629U, // <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+ 2687151101U, // <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+ 2223408022U, // <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+ 2283798684U, // <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+ 2642422785U, // <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+ 1613409590U, // <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2283801090U, // <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+ 2283801928U, // <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+ 1613409833U, // <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 2284471235U, // <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+ 2284472046U, // <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+ 2284472533U, // <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+ 1210728604U, // <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2284471239U, // <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+ 1210728786U, // <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 1683118234U, // <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210731848U, // <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+ 1210728609U, // <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+ 2720330023U, // <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+ 2757376190U, // <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+ 2726302202U, // <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+ 2283151516U, // <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 2224972114U, // <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+ 2224683162U, // <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+ 2726302520U, // <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+ 2283154760U, // <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+ 2283151521U, // <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+ 1652560896U, // <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+ 2333590225U, // <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+ 2765412628U, // <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+ 2291785884U, // <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+ 2781042984U, // <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+ 3365527953U, // <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+ 2791954748U, // <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+ 2291789128U, // <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+ 1657869960U, // <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+ 471081121U, // <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 269271142U, // <1,u,u,1>: Cost 1 vspltisw1 LHS
+ 1544824424U, // <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+ 1544824982U, // <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+ 471084342U, // <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1613412506U, // <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+ 1683118477U, // <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+ 1210756424U, // <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+ 471086894U, // <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+ 2226757632U, // <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+ 2226757734U, // <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 3826622483U, // <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+ 3843211292U, // <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+ 3300499794U, // <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+ 3356256724U, // <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+ 3825664056U, // <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+ 3762889289U, // <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+ 2226758301U, // <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+ 2227429386U, // <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+ 2227429478U, // <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+ 1691156582U, // <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2666358997U, // <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+ 2227462482U, // <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+ 3722186464U, // <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+ 3867099278U, // <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+ 3366881912U, // <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+ 1691156636U, // <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2228027392U, // <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+ 1154285670U, // <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 2228027565U, // <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+ 3301769468U, // <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+ 2228027730U, // <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+ 3301769635U, // <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+ 3780806586U, // <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+ 3368880760U, // <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+ 1154286237U, // <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 1213440000U, // <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1213441702U, // <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2228535470U, // <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+ 2636515632U, // <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+ 2287182962U, // <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+ 2660405346U, // <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+ 2228535798U, // <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660406420U, // <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+ 1213441709U, // <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3368894464U, // <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+ 2764898642U, // <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+ 3826622811U, // <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+ 3843211620U, // <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+ 3838640493U, // <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+ 2732944694U, // <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+ 3797396857U, // <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+ 3867099528U, // <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+ 2764898705U, // <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+ 3364257792U, // <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+ 2230124646U, // <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 3304235184U, // <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+ 3364260144U, // <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+ 3303817554U, // <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+ 3364260146U, // <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+ 3867099602U, // <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+ 3364260472U, // <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+ 2230125213U, // <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2230796288U, // <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+ 1157054566U, // <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 2230796465U, // <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+ 3304538364U, // <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+ 2230796626U, // <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+ 3797398205U, // <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+ 3304538614U, // <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+ 3798725471U, // <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+ 1157055133U, // <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 3371573248U, // <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+ 2231189606U, // <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 3801380003U, // <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+ 3802043636U, // <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+ 3806688614U, // <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+ 3356317308U, // <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+ 3804034535U, // <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+ 3806688876U, // <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+ 2231190173U, // <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+ 1208836096U, // <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1208837798U, // <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 1691157149U, // <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+ 2636556597U, // <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+ 2282579625U, // <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2660446306U, // <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+ 2228535798U, // <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+ 2660447385U, // <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+ 1208837805U, // <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 3692388523U, // <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+ 2757526244U, // <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+ 2330290974U, // <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+ 3843212020U, // <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+ 3692391734U, // <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+ 3300533362U, // <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+ 3794084337U, // <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+ 3374170614U, // <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+ 2758042403U, // <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+ 2690482924U, // <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+ 2764899124U, // <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+ 2695791510U, // <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+ 3362235271U, // <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+ 3692399926U, // <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+ 3832226649U, // <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+ 3301205235U, // <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+ 3768870179U, // <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+ 2695791988U, // <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+ 2618663085U, // <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+ 2228028212U, // <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+ 2618664552U, // <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+ 2759000984U, // <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+ 2618666294U, // <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+ 2295136594U, // <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+ 3769534376U, // <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+ 2793358266U, // <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+ 2618668846U, // <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+ 2282536969U, // <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208795146U, // <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1213442198U, // <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2287181998U, // <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2618674486U, // <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+ 1208795474U, // <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2287182001U, // <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287183055U, // <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208795153U, // <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 3692421295U, // <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+ 3838641195U, // <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+ 2330323742U, // <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+ 3692423318U, // <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+ 3692424502U, // <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+ 2695793974U, // <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3799395705U, // <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+ 3368895695U, // <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+ 2695794217U, // <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+ 3692429488U, // <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+ 3364257802U, // <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+ 3692431253U, // <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+ 3692431874U, // <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+ 3692432694U, // <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+ 3364258130U, // <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+ 3303875827U, // <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+ 3867100333U, // <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+ 3692435246U, // <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+ 2618695857U, // <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+ 2230797108U, // <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+ 2618697658U, // <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+ 3692439702U, // <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+ 2618699062U, // <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+ 3364929874U, // <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+ 3692442424U, // <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+ 3798733664U, // <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+ 2618701614U, // <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+ 3799397370U, // <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+ 3371573258U, // <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+ 2330351234U, // <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 3799397658U, // <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+ 3799397734U, // <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+ 3371573586U, // <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+ 3799397870U, // <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+ 3799397956U, // <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+ 2330351234U, // <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+ 2282577929U, // <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1208836106U, // <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1208838294U, // <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282578094U, // <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282577933U, // <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1208836434U, // <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282578097U, // <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287224015U, // <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1208836113U, // <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 2226759117U, // <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+ 1624047718U, // <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 2697789613U, // <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+ 2226767526U, // <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+ 2697789778U, // <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+ 3300657000U, // <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+ 2226988986U, // <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+ 3734271139U, // <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+ 1624048285U, // <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 3831268868U, // <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+ 2293138804U, // <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+ 2697790358U, // <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+ 2293137510U, // <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 3771532331U, // <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+ 3767551106U, // <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+ 3301173178U, // <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+ 3372853169U, // <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+ 2293137515U, // <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 1556938854U, // <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2295137733U, // <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+ 336380006U, // <2,2,2,2>: Cost 1 vspltisw2 LHS
+ 1221394534U, // <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+ 1556942134U, // <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2228029370U, // <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+ 2660545701U, // <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+ 336380006U, // <2,2,2,u>: Cost 1 vspltisw2 LHS
+ 2697791638U, // <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+ 2765489840U, // <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+ 1213441640U, // <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135053414U, // <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 2697792002U, // <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+ 2330313780U, // <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+ 2287183549U, // <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+ 2660553894U, // <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+ 135053419U, // <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2630697062U, // <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+ 3771534282U, // <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+ 2764900109U, // <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+ 2295152742U, // <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+ 2295154282U, // <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+ 1624050998U, // <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 2229675962U, // <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+ 3368896433U, // <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+ 1624051241U, // <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 3771534920U, // <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+ 3364258540U, // <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+ 2296489576U, // <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+ 2290516070U, // <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 3771535284U, // <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+ 2290517044U, // <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+ 2697793634U, // <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+ 3370231729U, // <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+ 2290516075U, // <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2230797801U, // <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+ 3304539679U, // <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+ 2764900273U, // <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+ 2764900282U, // <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+ 2230798129U, // <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+ 3304540008U, // <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+ 1157056442U, // <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725000033U, // <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+ 1157056442U, // <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2793359338U, // <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+ 3371574725U, // <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+ 2297833064U, // <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+ 2297831526U, // <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 2697794918U, // <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+ 3371575053U, // <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+ 3304933297U, // <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+ 2297833393U, // <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+ 2297831531U, // <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1556938854U, // <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1624053550U, // <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+ 336380006U, // <2,2,u,2>: Cost 1 vspltisw2 LHS
+ 135094374U, // <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 1556942134U, // <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1624053914U, // <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+ 1157056442U, // <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2660594859U, // <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+ 135094379U, // <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611448320U, // <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537706598U, // <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2689835181U, // <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2689835260U, // <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611448658U, // <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2732966354U, // <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2732966390U, // <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660603052U, // <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+ 537707165U, // <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689835748U, // <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611449140U, // <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611449238U, // <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 3763577805U, // <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+ 2689836112U, // <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689836143U, // <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689836239U, // <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 3366881210U, // <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+ 1616094588U, // <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2689836493U, // <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+ 2685191711U, // <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611449960U, // <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611450022U, // <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2689836822U, // <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+ 2689836904U, // <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+ 1611450298U, // <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2295138234U, // <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+ 1611450456U, // <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+ 1213440918U, // <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282538527U, // <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+ 1557022322U, // <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+ 1208796786U, // <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1213440922U, // <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282538531U, // <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2287188094U, // <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1213441978U, // <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 1208796791U, // <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+ 1551056998U, // <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+ 1551057818U, // <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+ 2624800360U, // <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+ 2624800918U, // <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+ 1551060278U, // <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+ 537709878U, // <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2732969337U, // <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2660635824U, // <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+ 537710121U, // <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689838664U, // <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2732969615U, // <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2732969707U, // <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+ 3763580721U, // <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+ 2689839028U, // <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659228164U, // <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659228258U, // <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 3364259770U, // <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+ 1659228420U, // <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2230798486U, // <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+ 2732970407U, // <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1659228666U, // <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2230798748U, // <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+ 2230798850U, // <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+ 2732970731U, // <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659228984U, // <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659229006U, // <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1659229087U, // <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+ 1659229178U, // <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2726999125U, // <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+ 2727662758U, // <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+ 2732971235U, // <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+ 1659229542U, // <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2732971446U, // <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2732971484U, // <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+ 1659229804U, // <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659229826U, // <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1208837014U, // <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 537712430U, // <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1616099205U, // <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+ 1208837746U, // <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+ 1208837018U, // <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 537712794U, // <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1616099536U, // <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1208838074U, // <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 537712997U, // <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 3771547648U, // <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+ 2697805926U, // <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+ 3770884269U, // <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+ 3806716164U, // <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+ 3771547986U, // <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+ 2226761014U, // <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3853462427U, // <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+ 3867102116U, // <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+ 2226761257U, // <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 3849186231U, // <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+ 3301207010U, // <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+ 3766240150U, // <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+ 3766240226U, // <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+ 3301207248U, // <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+ 2227432758U, // <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 3758941400U, // <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+ 3768894758U, // <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+ 2227433001U, // <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+ 2228030354U, // <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+ 3770885657U, // <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+ 2697807466U, // <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+ 3368880468U, // <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+ 2228030672U, // <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+ 1154288950U, // <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 3771549617U, // <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+ 3368880796U, // <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+ 1154289193U, // <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 2636808294U, // <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+ 2287181861U, // <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+ 2228866102U, // <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+ 2636810580U, // <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+ 1256574160U, // <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1213441742U, // <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2228866430U, // <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+ 2660701368U, // <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+ 1213441745U, // <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3704586342U, // <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+ 3782831051U, // <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+ 3704587900U, // <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+ 3368896123U, // <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+ 2793360592U, // <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+ 2697809206U, // <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 3303198078U, // <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+ 3867102444U, // <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+ 2697809449U, // <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+ 2630852710U, // <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+ 2624881572U, // <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+ 2630854269U, // <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+ 2666686677U, // <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+ 2630855990U, // <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+ 2230127926U, // <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+ 1691159862U, // <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 3867102520U, // <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+ 1691159880U, // <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230799250U, // <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+ 3304541130U, // <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+ 2230799417U, // <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+ 3304541323U, // <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+ 2230799568U, // <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+ 1157057846U, // <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3304541566U, // <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+ 3798758243U, // <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+ 1157058089U, // <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 3806721018U, // <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+ 3853831590U, // <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+ 3801412775U, // <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+ 3802076408U, // <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+ 3401436368U, // <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+ 2793360840U, // <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+ 3804067307U, // <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+ 3867102682U, // <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+ 2793360867U, // <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+ 2630877286U, // <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+ 2282580144U, // <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2630878848U, // <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+ 2636851545U, // <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+ 1256615120U, // <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1208837838U, // <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 1691160105U, // <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2660742333U, // <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+ 1208837841U, // <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 3766910976U, // <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+ 2693169254U, // <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3760939181U, // <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+ 3843214936U, // <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+ 3760939355U, // <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+ 3867102827U, // <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+ 3867102836U, // <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+ 3867102844U, // <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+ 2693169821U, // <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+ 3766911724U, // <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+ 3766911796U, // <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+ 2693170070U, // <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+ 3384798262U, // <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+ 2693170228U, // <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+ 3301208068U, // <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+ 3366879607U, // <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+ 3867102925U, // <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+ 2695824760U, // <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+ 2642845798U, // <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+ 2295139218U, // <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+ 2699142760U, // <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+ 3766912678U, // <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+ 2699142925U, // <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+ 2228031492U, // <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+ 2295138818U, // <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+ 3368879347U, // <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+ 2295138820U, // <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+ 2287184866U, // <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256573842U, // <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642855630U, // <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+ 2287182763U, // <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287184870U, // <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256574170U, // <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1213442562U, // <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287183091U, // <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1213442564U, // <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3716604006U, // <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+ 3716604822U, // <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+ 3766914099U, // <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+ 3368895403U, // <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+ 3716607031U, // <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+ 2693172534U, // <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3363588610U, // <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+ 3368895731U, // <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+ 2693172777U, // <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+ 3704668262U, // <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+ 3704669078U, // <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+ 3704669830U, // <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+ 3364259460U, // <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+ 3704671542U, // <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+ 2793361412U, // <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 3364258167U, // <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+ 3867103249U, // <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+ 2793361412U, // <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+ 2642878566U, // <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+ 3386166810U, // <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+ 2723033594U, // <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+ 3848523842U, // <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+ 2723033713U, // <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+ 2230800388U, // <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+ 2230800482U, // <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+ 2785841252U, // <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+ 2785914989U, // <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+ 3796775930U, // <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+ 3800757335U, // <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+ 3853463689U, // <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+ 3796776218U, // <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+ 3796776294U, // <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+ 3803411867U, // <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+ 3371575081U, // <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+ 3796776516U, // <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+ 3371575083U, // <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+ 2287225826U, // <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1256614802U, // <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2642896590U, // <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+ 2287223723U, // <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287225830U, // <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1256615130U, // <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1208838658U, // <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287224051U, // <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1208838660U, // <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 3772227584U, // <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+ 2698485862U, // <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3759620282U, // <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+ 3710675299U, // <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+ 3767583058U, // <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+ 3378153265U, // <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+ 3865186637U, // <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+ 2330291510U, // <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+ 2698486429U, // <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 3734569062U, // <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+ 3764929346U, // <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+ 3772228502U, // <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+ 3734571158U, // <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+ 3734572342U, // <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+ 3767583878U, // <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+ 3768247511U, // <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+ 2293140790U, // <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 2293140791U, // <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 3704717414U, // <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+ 3395424589U, // <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+ 2228031993U, // <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+ 2698487485U, // <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+ 3704720694U, // <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+ 3773556575U, // <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+ 2698487738U, // <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+ 1221397814U, // <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 1221397815U, // <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 2636955750U, // <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+ 2330314217U, // <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2636957626U, // <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+ 2287184230U, // <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636959030U, // <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+ 2648903448U, // <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+ 1256575800U, // <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135056694U, // <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135056695U, // <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 3710705766U, // <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+ 3698762677U, // <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+ 3710707389U, // <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+ 3710708071U, // <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+ 3710709046U, // <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+ 2698489142U, // <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 3796782457U, // <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+ 2295156022U, // <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 2295156023U, // <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 3303870753U, // <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+ 3788820134U, // <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+ 3779530520U, // <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+ 3303871026U, // <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+ 3303871117U, // <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+ 3791474666U, // <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+ 3792138299U, // <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+ 2290519350U, // <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2290519351U, // <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 2631008358U, // <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+ 3372893673U, // <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+ 2791445264U, // <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+ 2230800968U, // <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+ 2631011638U, // <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+ 3372894001U, // <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+ 2793362232U, // <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+ 2295835958U, // <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2295835959U, // <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+ 2793362254U, // <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+ 2792035160U, // <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+ 2792108897U, // <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+ 2769474408U, // <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+ 2793362294U, // <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+ 3371575089U, // <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+ 2792403845U, // <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+ 2297834806U, // <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2297834807U, // <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+ 2636996710U, // <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+ 2698491694U, // <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+ 2636998631U, // <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+ 2282580326U, // <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 2636999990U, // <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+ 2698492058U, // <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+ 1256616760U, // <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135097654U, // <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135097655U, // <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 2666864742U, // <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+ 1719620602U, // <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+ 3768254637U, // <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+ 3393417722U, // <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+ 2666868022U, // <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+ 3867104290U, // <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+ 3728667127U, // <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+ 2666869817U, // <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+ 1720136761U, // <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+ 3728670822U, // <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+ 3774227252U, // <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+ 3774227350U, // <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+ 2323001850U, // <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 3728674102U, // <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+ 3774227567U, // <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+ 2694513880U, // <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+ 3396744002U, // <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+ 2323001850U, // <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+ 2654937190U, // <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+ 3728679732U, // <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+ 2700486248U, // <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+ 2321682938U, // <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+ 2654940470U, // <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+ 3859584196U, // <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+ 2700486577U, // <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+ 2228033132U, // <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+ 2701813843U, // <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+ 1581203558U, // <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 2654946100U, // <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+ 2637031354U, // <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+ 1256575482U, // <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581206838U, // <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+ 2654949380U, // <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+ 1581208058U, // <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+ 1256575810U, // <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581209390U, // <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+ 3728695398U, // <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+ 3869758782U, // <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+ 3728696936U, // <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+ 3393450490U, // <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+ 3728698678U, // <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+ 2700487990U, // <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3728699899U, // <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+ 3867104626U, // <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+ 2700488233U, // <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 3855160709U, // <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+ 3728704406U, // <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+ 3370233956U, // <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+ 2320380410U, // <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 3728706870U, // <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+ 3867104694U, // <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+ 3792146492U, // <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+ 3394122562U, // <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+ 2320380410U, // <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+ 2230801402U, // <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+ 3768258984U, // <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+ 2730349050U, // <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 3372894575U, // <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+ 2230801766U, // <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+ 3304543670U, // <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+ 3728716285U, // <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+ 2230802028U, // <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+ 2730349050U, // <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+ 2793362983U, // <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+ 3728721112U, // <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+ 3371574933U, // <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+ 2327695866U, // <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+ 3728723254U, // <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+ 3371574855U, // <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+ 2730350062U, // <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+ 2793363052U, // <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+ 2798671471U, // <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+ 1581244518U, // <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1724929666U, // <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+ 2637072314U, // <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+ 1256616442U, // <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+ 1581247798U, // <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+ 2700490906U, // <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+ 1581249023U, // <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+ 1256616770U, // <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1581250350U, // <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+ 1611489280U, // <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 537747563U, // <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685231277U, // <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685231356U, // <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611489618U, // <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2226763930U, // <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+ 2733007350U, // <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+ 2660971737U, // <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+ 537748125U, // <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 2689876708U, // <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+ 1611490100U, // <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611490198U, // <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 2293137564U, // <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+ 2689877072U, // <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+ 2689877103U, // <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2689877199U, // <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2293140808U, // <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+ 1616135548U, // <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 1556938854U, // <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 1154291502U, // <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+ 336380006U, // <2,u,2,2>: Cost 1 vspltisw2 LHS
+ 1611490982U, // <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 1556942134U, // <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 1154291866U, // <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+ 1611491258U, // <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1221397832U, // <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+ 336380006U, // <2,u,2,u>: Cost 1 vspltisw2 LHS
+ 1611491478U, // <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+ 1213440073U, // <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+ 1213442261U, // <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135053468U, // <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 1611491842U, // <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+ 1213440401U, // <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+ 1213442589U, // <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135056712U, // <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135053473U, // <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1551425638U, // <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+ 1551426503U, // <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+ 2625169000U, // <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+ 2625169558U, // <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+ 1551428918U, // <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+ 537750838U, // <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2733010297U, // <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+ 2295156040U, // <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+ 537751081U, // <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 2689879624U, // <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+ 2230130478U, // <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+ 2631149217U, // <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+ 2290516124U, // <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+ 2689879988U, // <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+ 1659269124U, // <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1691162778U, // <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2290519368U, // <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+ 1691162796U, // <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+ 2230802131U, // <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+ 1157060398U, // <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659269626U, // <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+ 2764904656U, // <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+ 2230802495U, // <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+ 1157060762U, // <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+ 1659269944U, // <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659269966U, // <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1157060965U, // <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+ 1659270138U, // <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+ 2727040090U, // <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+ 2727703723U, // <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+ 2297831580U, // <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+ 1659270502U, // <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+ 2733012406U, // <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+ 2730358255U, // <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+ 1659270764U, // <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+ 1659270786U, // <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+ 1213481923U, // <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+ 537753390U, // <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 336380006U, // <2,u,u,2>: Cost 1 vspltisw2 LHS
+ 135094428U, // <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+ 1213481927U, // <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+ 537753754U, // <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1208838685U, // <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135097672U, // <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+ 135094433U, // <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+ 1678557184U, // <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1678557194U, // <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2631181989U, // <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+ 2289223984U, // <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+ 2756943909U, // <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+ 3362965729U, // <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+ 3362966054U, // <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+ 2289224312U, // <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+ 1683202121U, // <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+ 1557446758U, // <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+ 2752741467U, // <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+ 604815462U, // <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2631190676U, // <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+ 1557450038U, // <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+ 2667024388U, // <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+ 2800074894U, // <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+ 2661053667U, // <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+ 604815516U, // <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696521165U, // <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+ 2752741549U, // <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+ 2691876456U, // <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+ 2691876518U, // <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+ 3830685895U, // <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+ 3765618536U, // <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+ 2691876794U, // <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+ 2701166596U, // <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+ 2756944108U, // <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+ 2691877014U, // <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+ 1161003110U, // <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2691877168U, // <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+ 2691877246U, // <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+ 2691877378U, // <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+ 3765619238U, // <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+ 2691877496U, // <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 3368962680U, // <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+ 1161003677U, // <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2289254400U, // <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+ 1678557522U, // <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2631214761U, // <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+ 2235580672U, // <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 2756944237U, // <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+ 1618136374U, // <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 3309322742U, // <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+ 3362998904U, // <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+ 1683202449U, // <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 3765620296U, // <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+ 2752299427U, // <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+ 3789508346U, // <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+ 3403486842U, // <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+ 3765620660U, // <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+ 2733682692U, // <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+ 2800075218U, // <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+ 3873817044U, // <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+ 2800075234U, // <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+ 2752299501U, // <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+ 2236547174U, // <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2733683194U, // <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+ 3844473352U, // <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+ 3310289234U, // <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+ 3873817114U, // <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+ 2733683512U, // <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+ 2725057384U, // <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+ 2236547741U, // <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+ 2297905152U, // <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+ 2297906854U, // <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+ 2727711916U, // <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+ 3371649328U, // <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+ 2733684070U, // <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+ 3734843490U, // <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+ 3798799895U, // <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+ 2733684332U, // <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+ 2297906861U, // <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+ 1557504102U, // <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+ 1678557842U, // <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+ 604816029U, // <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2691880892U, // <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+ 1557507382U, // <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+ 1618139290U, // <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+ 2691881168U, // <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+ 2661111018U, // <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+ 604816083U, // <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2619310332U, // <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+ 2756944612U, // <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+ 2289221724U, // <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+ 2619312278U, // <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+ 2619313462U, // <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+ 2289221970U, // <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+ 2232599768U, // <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+ 3362964687U, // <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+ 2619316014U, // <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+ 2756944683U, // <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+ 1678558004U, // <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2691883927U, // <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+ 3826631496U, // <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+ 2756944723U, // <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+ 2756944732U, // <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 3830686561U, // <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+ 3734869228U, // <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+ 1678558004U, // <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2696529358U, // <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+ 2756944775U, // <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 2294548630U, // <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+ 1678558102U, // <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+ 2631273782U, // <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+ 2756944811U, // <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 3830686644U, // <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+ 2800075706U, // <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+ 1679000515U, // <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+ 2619334911U, // <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+ 2295218186U, // <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+ 2293229718U, // <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+ 2619337116U, // <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+ 2619338038U, // <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+ 2295218514U, // <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+ 3830686729U, // <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+ 3368961231U, // <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+ 2619340590U, // <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+ 2619343104U, // <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+ 2289254410U, // <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+ 2289256598U, // <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+ 2619345410U, // <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+ 2619346230U, // <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+ 2756944976U, // <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+ 3362996401U, // <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+ 3362997455U, // <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+ 2619348782U, // <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+ 2756945007U, // <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+ 3830686840U, // <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+ 3358361750U, // <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+ 3830686857U, // <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+ 2756945047U, // <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+ 2294571346U, // <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+ 3806105698U, // <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+ 3873817774U, // <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+ 2756945079U, // <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+ 3830686912U, // <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+ 2756945103U, // <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2236547990U, // <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+ 3826631905U, // <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+ 3830686952U, // <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+ 2756945139U, // <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 3830686972U, // <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+ 2800076030U, // <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+ 2756945166U, // <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+ 3699081318U, // <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+ 2297905162U, // <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+ 2297907350U, // <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+ 3365675182U, // <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+ 3699084598U, // <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+ 2297905490U, // <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+ 2297905329U, // <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 3368330447U, // <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+ 2297905169U, // <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+ 2619375876U, // <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+ 1678558004U, // <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 2289289366U, // <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+ 1679000956U, // <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+ 2619378998U, // <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+ 2756945297U, // <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+ 2297905329U, // <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+ 2800076192U, // <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+ 1683203497U, // <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+ 3362964203U, // <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+ 2289222380U, // <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+ 2289222462U, // <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+ 1215479910U, // <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3362964207U, // <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+ 2289222708U, // <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2232600506U, // <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+ 3396142296U, // <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+ 1215479915U, // <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 3699105894U, // <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+ 3765633844U, // <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+ 2691892120U, // <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+ 2752300575U, // <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+ 3699109174U, // <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+ 3830687280U, // <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+ 3830687289U, // <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+ 3874260548U, // <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+ 2752742988U, // <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+ 2631344230U, // <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+ 2697201184U, // <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+ 1678558824U, // <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678558834U, // <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 2631347510U, // <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+ 3368953613U, // <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+ 2234304442U, // <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+ 3368953777U, // <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+ 1679001247U, // <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+ 1678558886U, // <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+ 2752300719U, // <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+ 2752300729U, // <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+ 1221476454U, // <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+ 1678558926U, // <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+ 2800076503U, // <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+ 2234746810U, // <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+ 2800076516U, // <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+ 1678558958U, // <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+ 3699130470U, // <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+ 3362996972U, // <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+ 2289256040U, // <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+ 1215512678U, // <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3362998676U, // <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+ 2691894582U, // <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2235582394U, // <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+ 3734967544U, // <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+ 1215512683U, // <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 3705110630U, // <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+ 3368313985U, // <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+ 3368314472U, // <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+ 2756945768U, // <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+ 3705113910U, // <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+ 3310061416U, // <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+ 3310135226U, // <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+ 3370305457U, // <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+ 2752743317U, // <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+ 2631376998U, // <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+ 3705119540U, // <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+ 2631378621U, // <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+ 1678559162U, // <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2631380278U, // <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+ 3370976956U, // <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+ 2237065146U, // <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+ 3798815594U, // <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+ 1679001575U, // <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 2800076778U, // <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+ 3371647724U, // <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+ 2297906792U, // <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+ 1224163430U, // <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 3705130294U, // <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+ 3371648052U, // <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+ 2297906877U, // <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+ 3371648702U, // <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+ 1224163435U, // <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1679001659U, // <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+ 2752743492U, // <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+ 1678558824U, // <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+ 1678559320U, // <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+ 1679001699U, // <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+ 2691897498U, // <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+ 2237908922U, // <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+ 2800519289U, // <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+ 1679001731U, // <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+ 1215480726U, // <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1678559382U, // <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+ 2631403200U, // <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+ 2289223282U, // <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+ 2752301232U, // <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+ 3362965027U, // <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+ 3362965352U, // <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+ 2289223610U, // <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+ 1678559445U, // <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+ 3830687964U, // <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+ 2752301286U, // <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+ 2752301297U, // <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+ 2305157532U, // <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+ 3830688000U, // <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+ 3830688009U, // <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+ 3830688019U, // <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+ 3362973626U, // <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+ 2752743719U, // <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+ 2631417958U, // <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+ 3826043193U, // <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+ 1624131186U, // <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+ 2752301384U, // <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+ 2631421238U, // <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+ 3826485602U, // <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+ 2752301414U, // <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+ 2771249519U, // <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+ 1628112984U, // <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+ 1563656294U, // <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+ 2301855911U, // <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+ 2697873730U, // <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+ 403488870U, // <3,3,3,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 2301856239U, // <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+ 2697874067U, // <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+ 2295220154U, // <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+ 403488870U, // <3,3,3,u>: Cost 1 vspltisw3 LHS
+ 2289255318U, // <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+ 2631435162U, // <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+ 2631435972U, // <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+ 2289256050U, // <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+ 1215513498U, // <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679002114U, // <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+ 3362998120U, // <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+ 2289256378U, // <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+ 1679002141U, // <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+ 3831130657U, // <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+ 3376277671U, // <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+ 3771617012U, // <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+ 2302536092U, // <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+ 3831130697U, // <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+ 2294572579U, // <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+ 2800519773U, // <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+ 3368314810U, // <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+ 2800519791U, // <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+ 2800077432U, // <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+ 3310291185U, // <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+ 2789165706U, // <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+ 2764982931U, // <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+ 2800077468U, // <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+ 3873819301U, // <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+ 2297235304U, // <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+ 2725081963U, // <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+ 2725745596U, // <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+ 2631458918U, // <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+ 3705201460U, // <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+ 2631460551U, // <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+ 2297906802U, // <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+ 2631462198U, // <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+ 3371648547U, // <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+ 3371648548U, // <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+ 1224165306U, // <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1224165306U, // <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 1215480726U, // <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+ 1679002398U, // <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+ 1659967368U, // <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+ 403488870U, // <3,3,u,3>: Cost 1 vspltisw3 LHS
+ 1563659574U, // <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+ 1679002438U, // <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+ 2756946764U, // <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+ 1224165306U, // <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+ 403488870U, // <3,3,u,u>: Cost 1 vspltisw3 LHS
+ 2691907584U, // <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+ 1618165862U, // <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631476937U, // <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+ 2232601732U, // <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+ 2691907922U, // <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+ 1158860086U, // <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 3306343806U, // <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+ 3366947484U, // <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+ 1618166429U, // <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 2631483494U, // <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+ 2691908404U, // <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+ 1618166682U, // <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+ 3765650393U, // <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+ 2631486774U, // <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+ 2756946914U, // <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+ 3765650639U, // <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+ 3735090439U, // <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+ 1622148480U, // <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+ 3765650893U, // <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+ 3831131154U, // <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+ 2691909224U, // <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+ 2691909286U, // <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+ 2699208469U, // <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+ 2233863478U, // <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+ 2691909562U, // <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+ 2701199368U, // <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+ 2691909691U, // <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+ 2691909782U, // <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+ 3765651686U, // <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+ 2691909972U, // <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+ 2691910044U, // <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+ 2691910096U, // <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+ 1161006390U, // <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691910300U, // <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 3368962716U, // <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+ 1161006633U, // <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2631508070U, // <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+ 2631508890U, // <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+ 2631509709U, // <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+ 2289256788U, // <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+ 1726336208U, // <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+ 1618169142U, // <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 3362998858U, // <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+ 2289257116U, // <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+ 1618169385U, // <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 1557774438U, // <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+ 2631516980U, // <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+ 1557776078U, // <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+ 2631518358U, // <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+ 1557777718U, // <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+ 2296563406U, // <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+ 604818742U, // <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661381387U, // <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+ 604818760U, // <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 3705266278U, // <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+ 3831131482U, // <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+ 2733715962U, // <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+ 3844771180U, // <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+ 2800078197U, // <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+ 2236550454U, // <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716280U, // <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+ 2725090156U, // <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+ 2236550697U, // <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 2733716474U, // <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+ 3371647013U, // <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+ 2727744688U, // <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+ 3371649364U, // <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+ 2733716838U, // <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+ 2297906894U, // <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+ 3371647180U, // <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+ 2733717100U, // <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+ 2297906897U, // <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+ 1557799014U, // <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+ 1618171694U, // <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+ 1557800657U, // <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+ 2691913660U, // <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+ 1557802294U, // <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+ 1618172058U, // <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+ 604818985U, // <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2661405966U, // <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+ 604819003U, // <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2643492966U, // <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+ 2756947528U, // <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+ 2331029019U, // <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2643495062U, // <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+ 2756947554U, // <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+ 2800078443U, // <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+ 2289224194U, // <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+ 3362964723U, // <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+ 2756947590U, // <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+ 2800078479U, // <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+ 2333027218U, // <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+ 2691916699U, // <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+ 3832901294U, // <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+ 2800078519U, // <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+ 3830689467U, // <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+ 3830689481U, // <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+ 3873820365U, // <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+ 2800078551U, // <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+ 3770967487U, // <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+ 2697225763U, // <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+ 3830689523U, // <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+ 2699216590U, // <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+ 2699216662U, // <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+ 2783047439U, // <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+ 2783121176U, // <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+ 3856936737U, // <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+ 2701871194U, // <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+ 2643517542U, // <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+ 2331052946U, // <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+ 3699345010U, // <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+ 2705189276U, // <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+ 2705189359U, // <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+ 2331053274U, // <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+ 2295220738U, // <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+ 3368961267U, // <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+ 2295220740U, // <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+ 2643525734U, // <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+ 2331061138U, // <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+ 2235584280U, // <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+ 2643528194U, // <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+ 2735713498U, // <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+ 2756947892U, // <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+ 2289256962U, // <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+ 3362997491U, // <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+ 2756947919U, // <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+ 2800078803U, // <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+ 2800078812U, // <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+ 2631591639U, // <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+ 3832901616U, // <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+ 2800078843U, // <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+ 1726337028U, // <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078862U, // <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+ 3368314099U, // <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+ 1726337028U, // <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2800078884U, // <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+ 2800078899U, // <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+ 2631599832U, // <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+ 2800078914U, // <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+ 2800078924U, // <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+ 2800078935U, // <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+ 2297235970U, // <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+ 1726337122U, // <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+ 1726337131U, // <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+ 3699376230U, // <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+ 2333739922U, // <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+ 3699378106U, // <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+ 3371647915U, // <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+ 3699379510U, // <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+ 2333740250U, // <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+ 2297907714U, // <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+ 3370984691U, // <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+ 2297907716U, // <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+ 2800079046U, // <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+ 2756948176U, // <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+ 2331029019U, // <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+ 2800079076U, // <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+ 2800079085U, // <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+ 1726337028U, // <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 2289289730U, // <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+ 1726337284U, // <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+ 1726337293U, // <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+ 3773628416U, // <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+ 2699886694U, // <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789167401U, // <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+ 3362965862U, // <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+ 3773628754U, // <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+ 3723284326U, // <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+ 2800079181U, // <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+ 1215483190U, // <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1215483191U, // <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 3873821032U, // <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+ 3773629236U, // <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+ 2691924892U, // <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+ 3830690184U, // <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+ 3873821072U, // <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+ 3873821082U, // <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+ 3403453240U, // <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+ 2289233206U, // <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2289233207U, // <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 2661498982U, // <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+ 3770975780U, // <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+ 2631640797U, // <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+ 3771639485U, // <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+ 2661502262U, // <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+ 2699888488U, // <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+ 2661503482U, // <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+ 1715425786U, // <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+ 1715499523U, // <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+ 3773630614U, // <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+ 3372942825U, // <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+ 2234749434U, // <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+ 3368962406U, // <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+ 2699889154U, // <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+ 3773631068U, // <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+ 2331054904U, // <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+ 1221479734U, // <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 1221479735U, // <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 2235584801U, // <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+ 3717342106U, // <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+ 2789167729U, // <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+ 2235585074U, // <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+ 2235585165U, // <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+ 2699889974U, // <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 2800079509U, // <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+ 1215515958U, // <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1215515959U, // <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 3873821356U, // <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+ 3372959209U, // <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+ 3862909629U, // <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+ 3773632358U, // <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+ 3873821396U, // <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+ 3873821405U, // <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+ 3862909672U, // <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+ 2294574390U, // <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2294574391U, // <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 2800079613U, // <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+ 3873821446U, // <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+ 2789167888U, // <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+ 3844920090U, // <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+ 2800079653U, // <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+ 3723333484U, // <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+ 1726337848U, // <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726337858U, // <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+ 1726337867U, // <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+ 1726337870U, // <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+ 2297906665U, // <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+ 2792117090U, // <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+ 2297907558U, // <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+ 1726337910U, // <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+ 2297906993U, // <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+ 2297906832U, // <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+ 1224166710U, // <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224166711U, // <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1726337951U, // <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+ 2699892526U, // <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+ 2789168049U, // <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+ 2792854460U, // <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+ 1726337991U, // <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+ 2699892890U, // <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+ 1726337848U, // <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1215548726U, // <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 1215548727U, // <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 2700558336U, // <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+ 1626816614U, // <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700558513U, // <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+ 2331030010U, // <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+ 2700558674U, // <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+ 2800079906U, // <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+ 2655588936U, // <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+ 2800079919U, // <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+ 1626817181U, // <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 3774300899U, // <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+ 2700559156U, // <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+ 2700559254U, // <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+ 3774301148U, // <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+ 3774301227U, // <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+ 3774301295U, // <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+ 3768329441U, // <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+ 3403453250U, // <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+ 2700559740U, // <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+ 2700559849U, // <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+ 3770983973U, // <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+ 2700559976U, // <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+ 2698569415U, // <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+ 2700560177U, // <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+ 3773638505U, // <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+ 1626818490U, // <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+ 2795140307U, // <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+ 1628145756U, // <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+ 2700560534U, // <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+ 3774302438U, // <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+ 2700560742U, // <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+ 2700560796U, // <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+ 2700560898U, // <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+ 3774302821U, // <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+ 2700561079U, // <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+ 2700561091U, // <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+ 2700561182U, // <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+ 2655617126U, // <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+ 3774303178U, // <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+ 2655619002U, // <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+ 2331062778U, // <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+ 2655620406U, // <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+ 1626819894U, // <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 2655621708U, // <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+ 2800080247U, // <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+ 1626820137U, // <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 3774303816U, // <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+ 3873822093U, // <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+ 3774303998U, // <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+ 3862910368U, // <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+ 3774304180U, // <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+ 2800080310U, // <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+ 2800080321U, // <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+ 3873822147U, // <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+ 2800080339U, // <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+ 2800080348U, // <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+ 3873822181U, // <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+ 2789168622U, // <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+ 2700563016U, // <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+ 2800080384U, // <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+ 3862910472U, // <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+ 2700563256U, // <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+ 2800080404U, // <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+ 2793149988U, // <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+ 2637725798U, // <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+ 3371649227U, // <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+ 2637727674U, // <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+ 2297907567U, // <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+ 2637729078U, // <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+ 3371649312U, // <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+ 2655646287U, // <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+ 1726338668U, // <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1726338668U, // <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 2700564179U, // <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+ 1626822446U, // <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 2700564357U, // <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+ 2700564412U, // <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+ 2700564543U, // <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+ 1626822810U, // <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+ 1662654672U, // <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+ 1726338668U, // <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+ 1626823013U, // <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+ 1678557184U, // <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+ 1679005395U, // <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+ 2289221787U, // <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+ 1215479964U, // <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+ 2752747245U, // <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+ 1158863002U, // <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+ 2289224221U, // <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+ 1215483208U, // <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+ 1679005458U, // <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+ 1558036582U, // <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+ 1678558004U, // <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+ 604821294U, // <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2752747317U, // <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+ 1558039862U, // <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+ 2756949830U, // <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+ 2800080726U, // <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+ 2289233224U, // <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+ 604821348U, // <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696586709U, // <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+ 2757392246U, // <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+ 1624172151U, // <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+ 1679005576U, // <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+ 2631789878U, // <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+ 2699904874U, // <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+ 1626826683U, // <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+ 1726338988U, // <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+ 1683208117U, // <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+ 1679005628U, // <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+ 1161008942U, // <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+ 2752747471U, // <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+ 403488870U, // <3,u,3,3>: Cost 1 vspltisw3 LHS
+ 1679005668U, // <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+ 1161009306U, // <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+ 2691943104U, // <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+ 1221479752U, // <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+ 403488870U, // <3,u,3,u>: Cost 1 vspltisw3 LHS
+ 2289255363U, // <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+ 1161844526U, // <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+ 2289256661U, // <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+ 1215512732U, // <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+ 1215513498U, // <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+ 1679005759U, // <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+ 2289256989U, // <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+ 1215515976U, // <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+ 1679005786U, // <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+ 1558069350U, // <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+ 2631811892U, // <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+ 1558071026U, // <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+ 2752747646U, // <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+ 1558072630U, // <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+ 1726337028U, // <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+ 604821658U, // <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2294574408U, // <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+ 604821676U, // <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2631819366U, // <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+ 2757392574U, // <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+ 2631821043U, // <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+ 1679005904U, // <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 2631822646U, // <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+ 2236553370U, // <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+ 1726337848U, // <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+ 1726339309U, // <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+ 1683208445U, // <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+ 1726339328U, // <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+ 2297905225U, // <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+ 2631829236U, // <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+ 1224163484U, // <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1726339368U, // <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+ 2297905553U, // <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+ 2297905392U, // <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+ 1224166728U, // <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+ 1224163489U, // <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+ 1683208529U, // <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+ 1679006043U, // <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+ 604821861U, // <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 403488870U, // <3,u,u,3>: Cost 1 vspltisw3 LHS
+ 1683208569U, // <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+ 1679006083U, // <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+ 604821901U, // <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 1215548744U, // <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+ 604821915U, // <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 2759016448U, // <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+ 1165115494U, // <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 3717531337U, // <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+ 3369675785U, // <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+ 2751791144U, // <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+ 2238857630U, // <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+ 3312591341U, // <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+ 3369676113U, // <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+ 1165116061U, // <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+ 2637824102U, // <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+ 2637824922U, // <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+ 1685274726U, // <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637826512U, // <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+ 2637827382U, // <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+ 2661716070U, // <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+ 3729486427U, // <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+ 2661717300U, // <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+ 1685274780U, // <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 3711574118U, // <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+ 2240200806U, // <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 3771663992U, // <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+ 2698585801U, // <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+ 3373672105U, // <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+ 3810813795U, // <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+ 3772327866U, // <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+ 3386280568U, // <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+ 2701903966U, // <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+ 3699638374U, // <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+ 2753560832U, // <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 3772328276U, // <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+ 3827302674U, // <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+ 3699641654U, // <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+ 3779627588U, // <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+ 3772328604U, // <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+ 3780954854U, // <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+ 2753560832U, // <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+ 2725129106U, // <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+ 1167720550U, // <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 3839172953U, // <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+ 3772329051U, // <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+ 2241462610U, // <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+ 2698587446U, // <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 3772329297U, // <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+ 3735483703U, // <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+ 1167721117U, // <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 1168556032U, // <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 94814310U, // <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2242298029U, // <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2637859284U, // <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+ 1168556370U, // <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2242306530U, // <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+ 2242298358U, // <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661750072U, // <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+ 94814877U, // <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 3316580362U, // <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+ 2242846822U, // <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3798872570U, // <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+ 3796218413U, // <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+ 3834528273U, // <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+ 3798872811U, // <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+ 3316621876U, // <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+ 2725131121U, // <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+ 2242847389U, // <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+ 3377692672U, // <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+ 2243493990U, // <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 3775648970U, // <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+ 3802191110U, // <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+ 3317236050U, // <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+ 3803518376U, // <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+ 3317236214U, // <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+ 3798873708U, // <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+ 2243494557U, // <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 1170546688U, // <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 96804966U, // <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685275293U, // <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2637883863U, // <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+ 1170547026U, // <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2698590362U, // <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+ 2244289014U, // <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+ 2661774651U, // <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+ 96805533U, // <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+ 2667749478U, // <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+ 2689966182U, // <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+ 2238571418U, // <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+ 3711633880U, // <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+ 2689966418U, // <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+ 3361046866U, // <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+ 3741495802U, // <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+ 3741496314U, // <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+ 2689966765U, // <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+ 3764372222U, // <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+ 2758206263U, // <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+ 2698593178U, // <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+ 3361057810U, // <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+ 3827303250U, // <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+ 2287313234U, // <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+ 3763709171U, // <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+ 3361058138U, // <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+ 2239759744U, // <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+ 2637906022U, // <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+ 2637906842U, // <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+ 3763709544U, // <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+ 1685275546U, // <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+ 2637909302U, // <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+ 3361063250U, // <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+ 3763709882U, // <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+ 3735541054U, // <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+ 1685644231U, // <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+ 2702575792U, // <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+ 3832759257U, // <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+ 3833349090U, // <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+ 3763710364U, // <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+ 2707884546U, // <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+ 3361071442U, // <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+ 3772336796U, // <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+ 3775654595U, // <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+ 2707884856U, // <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+ 2667782246U, // <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+ 2241463092U, // <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+ 2241553306U, // <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+ 3827303484U, // <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+ 2667785424U, // <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+ 2689969462U, // <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 3763711322U, // <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+ 3867116636U, // <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+ 2689969705U, // <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+ 1546273106U, // <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+ 1168556852U, // <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1168556950U, // <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2620016790U, // <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+ 1546276150U, // <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+ 2620018692U, // <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+ 2242299087U, // <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667795450U, // <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+ 1546278702U, // <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+ 3781628193U, // <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+ 3832759503U, // <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+ 3316261786U, // <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+ 3781628466U, // <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+ 3827303658U, // <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+ 3361096018U, // <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+ 3788264248U, // <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+ 3788264270U, // <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+ 3832759566U, // <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+ 2726466580U, // <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+ 3377692682U, // <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+ 3377694870U, // <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+ 3802199303U, // <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+ 2731775334U, // <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+ 3377693010U, // <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+ 3365749804U, // <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+ 3788265068U, // <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+ 2731775644U, // <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+ 1546297685U, // <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+ 1170547508U, // <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1170547606U, // <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 1689257344U, // <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+ 1546300726U, // <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+ 2284716370U, // <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+ 2244289743U, // <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+ 2667820026U, // <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+ 1546303278U, // <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+ 3729621094U, // <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+ 3763716198U, // <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+ 2238858856U, // <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+ 2295930982U, // <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3763716434U, // <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+ 2238859107U, // <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+ 2238859194U, // <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+ 3312601066U, // <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+ 2295930987U, // <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 3699769446U, // <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+ 3313255971U, // <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+ 3361056360U, // <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+ 2287312998U, // <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3788932148U, // <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+ 3313256290U, // <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+ 3838289469U, // <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+ 3369682865U, // <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+ 2287313003U, // <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 3838658133U, // <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+ 3711722394U, // <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+ 2759018088U, // <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+ 2759018098U, // <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+ 3838658168U, // <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+ 3369027341U, // <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+ 2240227258U, // <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+ 3735614791U, // <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+ 2759018143U, // <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+ 2759018150U, // <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+ 3831948975U, // <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+ 3832759993U, // <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+ 2759018180U, // <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+ 2759018185U, // <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+ 3839542998U, // <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+ 3314640826U, // <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+ 2765948648U, // <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+ 2759018222U, // <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+ 3838658295U, // <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+ 3315205667U, // <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+ 2241463912U, // <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+ 1234829414U, // <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2241464085U, // <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+ 2241546087U, // <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+ 2241464250U, // <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+ 3741602873U, // <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+ 1234829419U, // <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 2626060390U, // <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+ 2626061364U, // <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+ 1168557672U, // <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222230118U, // <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 2626063670U, // <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+ 2242299752U, // <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1168558010U, // <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2242299882U, // <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1222230123U, // <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 3711754342U, // <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+ 3711755162U, // <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+ 3838658481U, // <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+ 2759018426U, // <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+ 3838658499U, // <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+ 3735646310U, // <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+ 3316590522U, // <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+ 3798889331U, // <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+ 2759018471U, // <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+ 3874564074U, // <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+ 3800880230U, // <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+ 3371722344U, // <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+ 2303950950U, // <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 3371722346U, // <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+ 3371722509U, // <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+ 3317237690U, // <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+ 3317237738U, // <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+ 2303950955U, // <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2759018555U, // <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+ 2626085943U, // <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+ 1170548328U, // <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1222254694U, // <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2759018595U, // <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+ 2244290408U, // <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+ 1170548666U, // <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2769266813U, // <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+ 1222254699U, // <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 2238859414U, // <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+ 2759018646U, // <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+ 3312314708U, // <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+ 2238859676U, // <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+ 2295931802U, // <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+ 3735670886U, // <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+ 3312315036U, // <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+ 3369674682U, // <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+ 2759018709U, // <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+ 3361055638U, // <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+ 3831949542U, // <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+ 2703917978U, // <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3361056370U, // <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+ 2295939994U, // <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+ 3361056291U, // <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+ 3378972520U, // <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+ 3361056698U, // <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+ 2703917978U, // <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+ 3832760624U, // <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+ 3711796122U, // <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+ 3832760641U, // <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+ 2770962764U, // <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+ 2759018836U, // <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+ 3827304802U, // <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+ 3832760678U, // <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+ 3859597679U, // <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+ 2771331449U, // <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+ 2240841878U, // <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+ 3776997635U, // <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+ 2703919444U, // <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+ 2759018908U, // <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+ 2759018918U, // <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+ 3386951446U, // <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+ 3777661596U, // <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+ 3375007674U, // <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+ 2707901242U, // <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+ 2759018960U, // <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+ 2759018970U, // <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+ 2632099605U, // <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+ 2241464732U, // <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+ 2759019000U, // <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+ 2753563138U, // <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+ 3777662316U, // <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+ 2308573114U, // <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+ 2759019032U, // <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+ 1168558230U, // <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2242300134U, // <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+ 2632107798U, // <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+ 1168558492U, // <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1168558594U, // <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2295973654U, // <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+ 2242300536U, // <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295973818U, // <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+ 1168558878U, // <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 3832760952U, // <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+ 3711828890U, // <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+ 3316484436U, // <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+ 3711830512U, // <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+ 2759019164U, // <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3361097251U, // <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+ 3316624045U, // <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+ 2773912244U, // <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+ 2759019164U, // <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+ 3377693590U, // <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+ 3365751680U, // <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+ 2727810232U, // <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+ 3377694322U, // <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+ 2303951770U, // <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+ 3741700198U, // <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+ 3377695216U, // <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+ 3375703994U, // <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+ 2731792030U, // <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+ 1170548886U, // <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2759019294U, // <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+ 2632132377U, // <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+ 1170549148U, // <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1170549250U, // <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 2759019334U, // <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+ 2244291192U, // <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+ 2295998394U, // <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+ 1170549534U, // <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+ 1165118354U, // <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1637482598U, // <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 3711854285U, // <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+ 3827305344U, // <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+ 2711224658U, // <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+ 1165118774U, // <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3312602489U, // <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+ 3369675420U, // <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+ 1165119017U, // <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 3369682633U, // <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+ 2287313581U, // <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+ 2759019466U, // <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+ 3369683284U, // <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+ 2311204048U, // <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+ 2239319350U, // <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+ 3784967411U, // <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+ 3369683612U, // <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+ 2763000832U, // <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+ 3711869030U, // <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+ 3711869850U, // <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+ 2240203830U, // <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+ 2698618573U, // <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+ 2711226133U, // <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+ 2240204086U, // <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2711226298U, // <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+ 3832761416U, // <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+ 2701936738U, // <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+ 2711226518U, // <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+ 3777005828U, // <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+ 3832761453U, // <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+ 2301266260U, // <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 2705254903U, // <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+ 2240843062U, // <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 3832761489U, // <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+ 3375008412U, // <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+ 2301266260U, // <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+ 1570373734U, // <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+ 2308574089U, // <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+ 2644117096U, // <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+ 2638146039U, // <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+ 229035318U, // <4,4,4,4>: Cost 1 vspltisw0 RHS
+ 1167723830U, // <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+ 2644120058U, // <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+ 2662036827U, // <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+ 229035318U, // <4,4,4,u>: Cost 1 vspltisw0 RHS
+ 1168558994U, // <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 2638152602U, // <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+ 2242300981U, // <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638154232U, // <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+ 1168559322U, // <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+ 94817590U, // <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685278006U, // <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2242309576U, // <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 94817833U, // <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+ 3316591506U, // <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+ 3758428587U, // <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+ 2711228922U, // <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+ 3796251185U, // <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+ 2711229085U, // <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+ 2242850102U, // <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2242850169U, // <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+ 2725163893U, // <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+ 2242850345U, // <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2711229434U, // <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+ 3377694410U, // <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+ 3868593584U, // <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+ 3377695060U, // <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+ 2729145691U, // <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+ 2243497270U, // <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 3871542744U, // <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+ 2303953564U, // <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+ 2243497513U, // <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 1170549650U, // <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+ 1637488430U, // <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+ 2244291637U, // <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+ 2638178811U, // <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+ 229035318U, // <4,4,u,4>: Cost 1 vspltisw0 RHS
+ 96808246U, // <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685278249U, // <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 2244292040U, // <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+ 96808489U, // <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+ 2698625024U, // <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+ 1624883302U, // <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2638186190U, // <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+ 2638187004U, // <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+ 2687345005U, // <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+ 2238861316U, // <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+ 2662077302U, // <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+ 2662077792U, // <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+ 1624883869U, // <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 3361057762U, // <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+ 2691326803U, // <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+ 2698625942U, // <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+ 3361055659U, // <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+ 3761087567U, // <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+ 2693981335U, // <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+ 2305231362U, // <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 3361055987U, // <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+ 2695972234U, // <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+ 2638200934U, // <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+ 3761088035U, // <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+ 2697963133U, // <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+ 1624884942U, // <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+ 2698626838U, // <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+ 3772368744U, // <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+ 2698627002U, // <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+ 3775023122U, // <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+ 1628203107U, // <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+ 2698627222U, // <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+ 3765070057U, // <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+ 2698627404U, // <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+ 2698627484U, // <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+ 2698627580U, // <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+ 3779668553U, // <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+ 2725169844U, // <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+ 2707253995U, // <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+ 2698627870U, // <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+ 2638217318U, // <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+ 2308574098U, // <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+ 2698628150U, // <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+ 2638219776U, // <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+ 2698628314U, // <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+ 1624886582U, // <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 2698628478U, // <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+ 2662110564U, // <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+ 1624886825U, // <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1570455654U, // <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+ 2312564250U, // <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+ 2644199118U, // <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+ 2295974966U, // <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+ 1570458842U, // <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+ 1168568324U, // <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+ 1168568418U, // <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2295975294U, // <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+ 1168716036U, // <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+ 1564491878U, // <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+ 2626290768U, // <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+ 2632263465U, // <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+ 1564494338U, // <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+ 1564495158U, // <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+ 2638237464U, // <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+ 2656154253U, // <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+ 27705344U, // <4,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,6,u>: Cost 0 copy RHS
+ 2725172218U, // <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+ 3859599489U, // <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+ 2698630320U, // <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+ 2728490251U, // <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+ 2725172576U, // <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+ 3317239812U, // <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+ 2725172760U, // <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+ 2725172844U, // <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+ 2725172866U, // <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+ 1564508262U, // <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+ 1624889134U, // <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+ 2698631045U, // <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+ 1564510724U, // <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+ 1564511542U, // <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+ 1624889498U, // <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+ 1170550882U, // <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 27705344U, // <4,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,5,u,u>: Cost 0 copy RHS
+ 3312595285U, // <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+ 3763748966U, // <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+ 2238861818U, // <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+ 3767730432U, // <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+ 3763749202U, // <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+ 2238862059U, // <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+ 2238862136U, // <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+ 2295934262U, // <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 2295934263U, // <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 3378973999U, // <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+ 3378974648U, // <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+ 3779675034U, // <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+ 3378974002U, // <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+ 3378974003U, // <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+ 3767731352U, // <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+ 3378974734U, // <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+ 2287316278U, // <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 2287316279U, // <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 3735904358U, // <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+ 3763750435U, // <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+ 3313938937U, // <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+ 3772376782U, // <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+ 3852890591U, // <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+ 3735908454U, // <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+ 3801573306U, // <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+ 2785858042U, // <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+ 2785858051U, // <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+ 3863065101U, // <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+ 3314586024U, // <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+ 3863212575U, // <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+ 3863286312U, // <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+ 3767732738U, // <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+ 3779676746U, // <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+ 3398898488U, // <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+ 2301267254U, // <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2301267255U, // <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 3852890715U, // <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+ 3315208615U, // <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+ 2241466874U, // <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+ 3852890745U, // <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+ 2241467037U, // <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+ 2241549039U, // <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+ 2241467192U, // <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+ 1234832694U, // <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 1234832695U, // <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 2242302241U, // <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2242310567U, // <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1168568826U, // <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2242302514U, // <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2242302605U, // <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2242310891U, // <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1168569144U, // <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222233398U, // <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 1222233399U, // <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 3316576545U, // <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+ 3316584871U, // <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+ 2242851322U, // <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+ 3316601394U, // <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+ 3852890916U, // <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+ 3316617963U, // <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+ 2242884408U, // <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+ 2785858370U, // <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+ 2785858379U, // <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+ 2785858382U, // <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+ 3859600215U, // <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+ 3317240314U, // <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+ 2792199020U, // <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+ 2785858422U, // <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+ 3856651132U, // <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+ 3317240632U, // <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+ 2303954230U, // <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303954231U, // <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2244292897U, // <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+ 2244293031U, // <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1170551290U, // <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2244293170U, // <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2244293261U, // <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+ 2244293355U, // <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+ 1170551608U, // <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1222257974U, // <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 1222257975U, // <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+ 2238862330U, // <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+ 2706604134U, // <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3312604308U, // <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+ 3768402176U, // <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+ 2238862648U, // <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+ 3859600418U, // <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+ 3729994393U, // <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+ 2238862956U, // <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+ 2706604701U, // <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 3385610338U, // <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+ 3780346676U, // <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+ 2706604954U, // <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3385610746U, // <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+ 3385610342U, // <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+ 3385610667U, // <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+ 3768403178U, // <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+ 3385611074U, // <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+ 2706604954U, // <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+ 3859600532U, // <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+ 3712091034U, // <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+ 3774375528U, // <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+ 2794853552U, // <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+ 2785858744U, // <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+ 3735982182U, // <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+ 3774375875U, // <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+ 3735983476U, // <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+ 2795222237U, // <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+ 3780348054U, // <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+ 3730015130U, // <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+ 3780348244U, // <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+ 3778357673U, // <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+ 2325155942U, // <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+ 3779684939U, // <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+ 2706606748U, // <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+ 3398898498U, // <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+ 2707934014U, // <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+ 2785858868U, // <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+ 3780348874U, // <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+ 3780349000U, // <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+ 2308575738U, // <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+ 2656283856U, // <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+ 2706607414U, // <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2656285341U, // <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+ 2241468012U, // <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+ 2706607657U, // <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 1168569338U, // <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2242311242U, // <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+ 2242303178U, // <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+ 2242311395U, // <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1168569702U, // <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2242311606U, // <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+ 2242311662U, // <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1168569964U, // <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1168569986U, // <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 3316593658U, // <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+ 3316593738U, // <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+ 3316634800U, // <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+ 3386978810U, // <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+ 2785859072U, // <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+ 3736014950U, // <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+ 3316594158U, // <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+ 2797803032U, // <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+ 2797876769U, // <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+ 2243499002U, // <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+ 3718103962U, // <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+ 3317257418U, // <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+ 3377695816U, // <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+ 2243532134U, // <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+ 3317282230U, // <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+ 2730497536U, // <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+ 2243556972U, // <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+ 2243565186U, // <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+ 1170551802U, // <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2706609966U, // <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+ 2244293797U, // <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+ 2244293859U, // <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+ 1170552166U, // <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 2706610330U, // <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+ 2244294126U, // <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+ 1170552428U, // <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1170552450U, // <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+ 1165118354U, // <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1624907878U, // <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638407377U, // <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+ 2295931036U, // <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+ 2687369584U, // <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+ 1165121690U, // <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+ 2662298489U, // <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+ 2295934280U, // <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+ 1624908445U, // <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+ 2638413926U, // <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+ 2691351382U, // <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+ 1685280558U, // <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2287313052U, // <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+ 2299257799U, // <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+ 2694005914U, // <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+ 2305231362U, // <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+ 2287316296U, // <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+ 1685280612U, // <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 2638422118U, // <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+ 2240206638U, // <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+ 2697987712U, // <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+ 1624909521U, // <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+ 2759391121U, // <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+ 2240207002U, // <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+ 2698651578U, // <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+ 2785859500U, // <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+ 1628227686U, // <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+ 2759022524U, // <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+ 2801342408U, // <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+ 2703960409U, // <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+ 2759022554U, // <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+ 2759022564U, // <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+ 2240845978U, // <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+ 2706614941U, // <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+ 2301267272U, // <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+ 2759022596U, // <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+ 1570668646U, // <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+ 1167726382U, // <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+ 2698652753U, // <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+ 1234829468U, // <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+ 229035318U, // <4,u,4,4>: Cost 1 vspltisw0 RHS
+ 1624911158U, // <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+ 2698653081U, // <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+ 1234832712U, // <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+ 229035318U, // <4,u,4,u>: Cost 1 vspltisw0 RHS
+ 1168561875U, // <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+ 94820142U, // <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1168562053U, // <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+ 1222230172U, // <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+ 1168562239U, // <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+ 94820506U, // <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 1685280922U, // <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 1222233416U, // <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+ 94820709U, // <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+ 1564713062U, // <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+ 2626511979U, // <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+ 2632484676U, // <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+ 1564715549U, // <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+ 1564716342U, // <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+ 2242853018U, // <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+ 2656375464U, // <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+ 27705344U, // <4,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,6,u>: Cost 0 copy RHS
+ 2785859840U, // <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+ 2243499822U, // <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+ 2727851197U, // <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+ 2303951004U, // <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 2785859880U, // <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+ 2243500186U, // <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+ 2730505729U, // <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+ 2303954248U, // <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+ 2303951009U, // <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+ 1564729446U, // <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+ 96810798U, // <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+ 1685281125U, // <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+ 1222254748U, // <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+ 229035318U, // <4,u,u,4>: Cost 1 vspltisw0 RHS
+ 96811162U, // <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+ 1685281165U, // <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+ 27705344U, // <4,u,u,7>: Cost 0 copy RHS
+ 27705344U, // <4,u,u,u>: Cost 0 copy RHS
+ 2754232320U, // <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+ 2754232330U, // <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+ 3718194894U, // <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+ 3376385762U, // <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+ 2754232357U, // <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+ 3845816370U, // <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+ 3782353389U, // <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+ 3376386090U, // <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+ 2757402697U, // <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+ 2626543718U, // <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+ 2626544751U, // <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+ 1680490598U, // <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3766428665U, // <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+ 2626546998U, // <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+ 2650435539U, // <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+ 3783017715U, // <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+ 3385019000U, // <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+ 1680490652U, // <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 3376398336U, // <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+ 2245877862U, // <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 3773064808U, // <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+ 2705295054U, // <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 3827974343U, // <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+ 3845816530U, // <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+ 3779037114U, // <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+ 3810887658U, // <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+ 2245878429U, // <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2710603926U, // <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+ 3827974396U, // <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+ 3779037516U, // <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+ 3779037596U, // <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+ 2705295868U, // <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+ 3379726804U, // <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+ 3802925748U, // <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+ 3363138168U, // <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+ 2707950400U, // <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+ 2626568294U, // <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+ 1680490834U, // <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 3828048219U, // <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+ 2710604932U, // <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+ 2754232685U, // <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+ 2705296694U, // <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779038590U, // <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+ 2713259464U, // <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1680490834U, // <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+ 2311307264U, // <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+ 1174437990U, // <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 3779038946U, // <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+ 3845816752U, // <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+ 2248180050U, // <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+ 2248180194U, // <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+ 3779039274U, // <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+ 3385051768U, // <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+ 1174438557U, // <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2302689280U, // <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+ 1175208038U, // <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 3787002362U, // <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+ 3376432160U, // <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+ 2248950098U, // <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+ 2248950180U, // <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 3376433702U, // <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+ 2729186166U, // <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+ 1175208605U, // <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2713261050U, // <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+ 3365823599U, // <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+ 3808900317U, // <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+ 3784348899U, // <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+ 2729186656U, // <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+ 3787003268U, // <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+ 3802928664U, // <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+ 3787003431U, // <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+ 2731841188U, // <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+ 2626601062U, // <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+ 1683145366U, // <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+ 1680491165U, // <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2705295054U, // <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+ 2754233005U, // <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+ 2705299610U, // <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+ 3779041488U, // <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+ 2737150252U, // <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+ 1680491219U, // <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2713927680U, // <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+ 1640185958U, // <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2310607866U, // <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 3787669756U, // <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+ 2713928018U, // <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+ 2306621778U, // <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+ 3787670006U, // <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+ 3736188301U, // <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+ 1640186525U, // <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2650505318U, // <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+ 2754233140U, // <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+ 2311276694U, // <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+ 2311278315U, // <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+ 2758435667U, // <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+ 2754233180U, // <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+ 3385016497U, // <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+ 2311278643U, // <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+ 2758730615U, // <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+ 3700367462U, // <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+ 3830629255U, // <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+ 2713929320U, // <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+ 2754233238U, // <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+ 2759099300U, // <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+ 2754233259U, // <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+ 2713929658U, // <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+ 3872359354U, // <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+ 2754233283U, // <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+ 2713929878U, // <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+ 3363135498U, // <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+ 3363137686U, // <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+ 2713930140U, // <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+ 2713930242U, // <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+ 2289394002U, // <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+ 3787672184U, // <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+ 3787672259U, // <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+ 2713930526U, // <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+ 1634880402U, // <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+ 2760205355U, // <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+ 2760279092U, // <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+ 3787672708U, // <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+ 2713930960U, // <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+ 1640189238U, // <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 3786345848U, // <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+ 3787009481U, // <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+ 1640189466U, // <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+ 2754233455U, // <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+ 2713931407U, // <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+ 2713931499U, // <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+ 3827975305U, // <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+ 2754233495U, // <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+ 2288746834U, // <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+ 2713931827U, // <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+ 3787673725U, // <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+ 2754233527U, // <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+ 2668462182U, // <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+ 2290746002U, // <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+ 2302691478U, // <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+ 3364488071U, // <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+ 2302689536U, // <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+ 2754233587U, // <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+ 2713932600U, // <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+ 2713932622U, // <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+ 2302689297U, // <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+ 2713932794U, // <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+ 3365822474U, // <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+ 3365824662U, // <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+ 3787674851U, // <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+ 2713933158U, // <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+ 2292080978U, // <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+ 3365823613U, // <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+ 2713933420U, // <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+ 2713933442U, // <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+ 1658771190U, // <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+ 1640191790U, // <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 2762933624U, // <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+ 2754233724U, // <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+ 2763081098U, // <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+ 1640192154U, // <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+ 2713934032U, // <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+ 2713934080U, // <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+ 1640192357U, // <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+ 3779051520U, // <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+ 2705309798U, // <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 3838813637U, // <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+ 2302640230U, // <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3765117266U, // <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+ 3381027892U, // <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+ 3842794985U, // <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+ 3408232554U, // <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+ 2302640235U, // <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 3700432998U, // <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+ 3765117785U, // <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+ 2311276136U, // <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+ 1237532774U, // <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700436278U, // <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+ 3381036084U, // <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+ 3385018045U, // <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+ 3385017560U, // <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+ 1237532779U, // <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 3700441190U, // <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+ 3700442242U, // <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+ 2754233960U, // <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+ 2754233970U, // <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+ 2765071997U, // <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+ 3834021508U, // <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+ 3842795152U, // <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+ 3376402492U, // <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+ 2754234015U, // <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+ 2754234022U, // <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+ 3827975855U, // <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+ 2644625102U, // <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393766U, // <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1691993806U, // <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+ 2785052375U, // <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+ 3854812897U, // <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+ 3802942187U, // <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+ 1692288754U, // <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+ 3839846139U, // <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+ 2709294052U, // <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+ 2766251789U, // <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+ 2765735702U, // <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+ 3840141087U, // <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+ 2705313078U, // <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2712612217U, // <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+ 3787017674U, // <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+ 2765735747U, // <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+ 3834021704U, // <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+ 3834021714U, // <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+ 2311308904U, // <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+ 1237565542U, // <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3834021744U, // <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+ 3369124916U, // <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+ 2248181690U, // <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+ 3786354825U, // <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+ 1237565547U, // <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 3700473958U, // <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+ 3700475014U, // <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+ 2296718952U, // <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+ 1228947558U, // <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3700477238U, // <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+ 3834021836U, // <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+ 2248951738U, // <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+ 3370461105U, // <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+ 1228947563U, // <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 3786355706U, // <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+ 3783038037U, // <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+ 3365824104U, // <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+ 2292080742U, // <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 3842131986U, // <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+ 3371795508U, // <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+ 3786356206U, // <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+ 3786356332U, // <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+ 2292080747U, // <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+ 2754234427U, // <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+ 2705315630U, // <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+ 2296735336U, // <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+ 1228963942U, // <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 1695311971U, // <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+ 2705315994U, // <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+ 2769201269U, // <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+ 3370477489U, // <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+ 1695606919U, // <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+ 3827976331U, // <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+ 2754234518U, // <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+ 3706472290U, // <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+ 3700500630U, // <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+ 2754234544U, // <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+ 3376383766U, // <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+ 3769770513U, // <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+ 3376383930U, // <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+ 2754234581U, // <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+ 2311275414U, // <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+ 2305967971U, // <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+ 2692047787U, // <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+ 2311276146U, // <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+ 2311275418U, // <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+ 3765789807U, // <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+ 3765789939U, // <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+ 2311276474U, // <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+ 2696029585U, // <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+ 2311288709U, // <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 3765790243U, // <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+ 3827976513U, // <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+ 2765736268U, // <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+ 2246248962U, // <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+ 3765790563U, // <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+ 3827976550U, // <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+ 3842795887U, // <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+ 2769054073U, // <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+ 3827976575U, // <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+ 3765790963U, // <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+ 3839478162U, // <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+ 2754234780U, // <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+ 2771708327U, // <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+ 3363137059U, // <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+ 3375081320U, // <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+ 3363137466U, // <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+ 2772003275U, // <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+ 2772077012U, // <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+ 3765791714U, // <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+ 2709965878U, // <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+ 2772298223U, // <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+ 2772371960U, // <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+ 2754234882U, // <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+ 3839478282U, // <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+ 3376416698U, // <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+ 2754234909U, // <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+ 2311308182U, // <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+ 3765792421U, // <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+ 2715938575U, // <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+ 2311308914U, // <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+ 2311308186U, // <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+ 2248182354U, // <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+ 3765792837U, // <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+ 2311309242U, // <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+ 2311308190U, // <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+ 2632777830U, // <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3706520372U, // <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+ 2632779624U, // <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+ 2632780290U, // <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+ 2632781110U, // <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+ 2248952413U, // <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+ 2302691176U, // <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302691258U, // <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+ 2632783662U, // <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+ 3365823382U, // <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+ 3706529011U, // <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+ 3706529641U, // <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+ 3365824114U, // <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+ 2774362859U, // <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+ 3365824035U, // <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+ 3383740183U, // <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+ 3363833786U, // <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+ 2774657807U, // <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+ 2632794214U, // <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+ 2754235166U, // <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+ 2632796010U, // <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+ 2632796676U, // <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+ 2632797494U, // <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+ 2754235206U, // <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+ 2302691176U, // <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+ 2302707642U, // <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+ 2754235229U, // <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+ 3765133325U, // <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+ 2705326182U, // <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 3718489806U, // <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+ 3718490624U, // <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+ 2709307730U, // <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+ 2302641870U, // <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+ 3376383695U, // <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+ 3384351018U, // <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+ 2705326749U, // <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+ 2305971057U, // <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+ 3765134171U, // <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+ 3766461338U, // <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+ 3766461437U, // <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+ 2311277776U, // <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+ 2754235362U, // <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+ 3783050483U, // <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+ 3385019036U, // <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+ 2311276241U, // <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+ 3718504550U, // <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+ 3783050787U, // <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+ 3773097576U, // <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+ 2705327822U, // <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 3773097767U, // <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+ 2765737014U, // <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+ 3779069882U, // <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+ 3376401052U, // <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+ 2245881370U, // <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+ 3779070102U, // <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+ 3363135525U, // <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+ 3779070284U, // <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+ 3779070364U, // <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+ 2705328640U, // <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+ 2307311310U, // <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+ 3866021012U, // <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+ 3363138204U, // <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+ 2707983172U, // <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+ 2708646805U, // <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+ 2709310438U, // <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+ 3779071030U, // <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+ 2710637704U, // <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+ 2754235600U, // <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+ 1704676570U, // <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+ 3779071358U, // <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+ 2713292236U, // <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+ 1704897781U, // <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+ 2626871398U, // <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+ 2626872471U, // <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+ 2765737230U, // <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+ 3700615318U, // <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+ 2626874678U, // <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+ 1174441270U, // <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+ 1680493878U, // <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 3385051804U, // <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+ 1680493896U, // <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2248952722U, // <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+ 2302692152U, // <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 3382406107U, // <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+ 3700623874U, // <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+ 2248953040U, // <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+ 1175211318U, // <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3376432280U, // <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+ 2729218934U, // <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+ 1175211561U, // <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 3787035642U, // <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+ 3365822501U, // <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+ 3808933085U, // <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+ 3784381707U, // <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+ 2713294182U, // <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+ 2309998286U, // <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+ 3383740111U, // <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+ 3787036239U, // <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+ 2731873960U, // <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+ 2626895974U, // <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+ 2626897050U, // <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+ 2644813518U, // <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+ 2705327822U, // <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+ 2626899254U, // <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+ 1707331102U, // <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+ 1680494121U, // <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2737183024U, // <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+ 1680494139U, // <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 2302642684U, // <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+ 1640218726U, // <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 3376384510U, // <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+ 3376385078U, // <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+ 2754236002U, // <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+ 2717942242U, // <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+ 2244907106U, // <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 3376385406U, // <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+ 1640219293U, // <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2305969365U, // <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+ 1237536282U, // <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2713961366U, // <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+ 3766469630U, // <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+ 2782326455U, // <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+ 2311277786U, // <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+ 2311277058U, // <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+ 3385017587U, // <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+ 1237536282U, // <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 3376400892U, // <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+ 3827977963U, // <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+ 2302659070U, // <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+ 2765737726U, // <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+ 3839479558U, // <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+ 2781073167U, // <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+ 2713962426U, // <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+ 3376401790U, // <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+ 2769055531U, // <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+ 2713962646U, // <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+ 3765143786U, // <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+ 3839479621U, // <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+ 2289394603U, // <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+ 2713963010U, // <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+ 2313285150U, // <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+ 3363138050U, // <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+ 3363136755U, // <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+ 2713963294U, // <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+ 2713963410U, // <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+ 3827978127U, // <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+ 3839479704U, // <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+ 3376417846U, // <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+ 1637567706U, // <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+ 1640222006U, // <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+ 2310640998U, // <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+ 3376418174U, // <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+ 1640222238U, // <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+ 1577091174U, // <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 2311310226U, // <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+ 2713964303U, // <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+ 2311311119U, // <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+ 1577094454U, // <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,5,5>: Cost 1 vspltisw1 RHS
+ 2311309826U, // <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+ 2311311447U, // <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+ 296144182U, // <5,5,5,u>: Cost 1 vspltisw1 RHS
+ 2248953460U, // <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+ 2326580114U, // <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+ 2713965050U, // <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+ 3700697602U, // <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+ 2785644620U, // <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+ 2781073495U, // <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+ 1228950018U, // <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965390U, // <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+ 1228950018U, // <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713965562U, // <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+ 3383741330U, // <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+ 3718620878U, // <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+ 3365823403U, // <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+ 2713965926U, // <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+ 2717947318U, // <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+ 3365825026U, // <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+ 2292081907U, // <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+ 2713966210U, // <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+ 1577091174U, // <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1640224558U, // <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+ 2713966469U, // <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+ 2713966524U, // <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+ 1577094454U, // <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,5,u,5>: Cost 1 vspltisw1 RHS
+ 1228950018U, // <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 2713966848U, // <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+ 296144182U, // <5,5,u,u>: Cost 1 vspltisw1 RHS
+ 2705342464U, // <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+ 1631600742U, // <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3773112493U, // <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+ 2705342720U, // <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+ 2705342802U, // <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+ 3779084708U, // <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+ 3779084790U, // <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+ 2302643510U, // <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631601309U, // <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 3767141092U, // <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+ 2705343284U, // <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+ 2705343382U, // <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+ 3779085282U, // <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+ 2693399632U, // <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+ 3767805089U, // <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+ 2311279416U, // <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+ 1237536054U, // <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1237536055U, // <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 3773113789U, // <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+ 3779085855U, // <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+ 2699372136U, // <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+ 2705344166U, // <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+ 2699372329U, // <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+ 2705344360U, // <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+ 2705344442U, // <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+ 2302659894U, // <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2702026861U, // <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+ 2705344662U, // <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+ 3767142661U, // <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+ 3773114689U, // <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+ 2705344924U, // <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+ 1631603202U, // <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+ 3842945597U, // <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+ 3779086962U, // <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+ 2289397046U, // <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634257734U, // <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+ 2644926566U, // <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+ 3779087306U, // <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+ 2790142577U, // <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+ 2644929026U, // <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+ 2711317723U, // <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+ 1631604022U, // <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 2712644989U, // <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+ 2302676278U, // <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631604265U, // <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 3842945708U, // <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+ 3767144133U, // <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+ 2705346328U, // <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+ 3779088207U, // <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+ 2717290420U, // <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+ 2705346574U, // <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+ 2705346596U, // <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+ 1237568822U, // <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 1237568823U, // <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 2650914918U, // <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+ 3364490949U, // <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+ 2248954362U, // <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+ 2302693144U, // <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+ 2650918198U, // <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+ 2650918926U, // <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+ 2302693390U, // <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+ 1228950838U, // <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228950839U, // <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 497467494U, // <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571210036U, // <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571210856U, // <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571211414U, // <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497470774U, // <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571213316U, // <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571213818U, // <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571214956U, // <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+ 497473326U, // <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497475686U, // <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631606574U, // <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+ 1571219048U, // <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571219606U, // <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497478967U, // <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631606938U, // <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+ 1571222010U, // <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1228967222U, // <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497481518U, // <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 3768475648U, // <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+ 2694733926U, // <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 3718711395U, // <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+ 3384349178U, // <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+ 2694734162U, // <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+ 3384347884U, // <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+ 3730658026U, // <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+ 3718714362U, // <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+ 2694734493U, // <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2311278690U, // <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+ 2305970923U, // <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+ 3768476566U, // <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+ 2311279098U, // <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+ 2311278694U, // <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+ 3768476783U, // <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+ 2694735091U, // <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+ 2311279426U, // <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+ 2696062357U, // <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+ 3383701602U, // <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+ 3768477219U, // <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+ 3768477288U, // <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+ 2309960186U, // <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3383701606U, // <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+ 3768477545U, // <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+ 3766486970U, // <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+ 3383702338U, // <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+ 2309960186U, // <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+ 3768477846U, // <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+ 3768477975U, // <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+ 3786393932U, // <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+ 3768478108U, // <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+ 2795599115U, // <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+ 3385037470U, // <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+ 3780422309U, // <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+ 3848107301U, // <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+ 2795894063U, // <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+ 2795967800U, // <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+ 3768478690U, // <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+ 3718744163U, // <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+ 3784404107U, // <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+ 2796262748U, // <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+ 2694737206U, // <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2712653182U, // <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+ 2713316815U, // <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+ 2694737449U, // <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2311311458U, // <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+ 3768479433U, // <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+ 3768479521U, // <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+ 2311311866U, // <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+ 2311311462U, // <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+ 2248185270U, // <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+ 2718625879U, // <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+ 2311312194U, // <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+ 2311311466U, // <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+ 2248954874U, // <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+ 3322696778U, // <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+ 2248955028U, // <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2656963074U, // <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+ 2248955238U, // <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+ 2248955329U, // <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+ 2656965360U, // <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+ 2248955500U, // <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+ 2248955522U, // <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+ 3718766694U, // <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+ 3724739827U, // <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+ 3718768739U, // <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+ 3365826337U, // <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+ 2798253647U, // <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+ 3365826258U, // <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+ 3730715377U, // <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+ 2310665836U, // <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+ 2798548595U, // <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+ 2311336034U, // <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+ 2694739758U, // <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2248955028U, // <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+ 2311336442U, // <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+ 2311336038U, // <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+ 2694740122U, // <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+ 2656981746U, // <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+ 2311336770U, // <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+ 2694740325U, // <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+ 2705358848U, // <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+ 1631617126U, // <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2310607866U, // <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+ 2302640284U, // <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+ 2754238189U, // <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+ 2305296114U, // <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+ 2244907106U, // <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+ 2302643528U, // <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+ 1631617693U, // <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 2627133542U, // <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+ 1237536282U, // <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 1680496430U, // <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1237532828U, // <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+ 2693416018U, // <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+ 2756892486U, // <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+ 2694743284U, // <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+ 1237536072U, // <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+ 1680496484U, // <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 2311288709U, // <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+ 2245883694U, // <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+ 2699388520U, // <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+ 2754238344U, // <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+ 2699388715U, // <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+ 2757408666U, // <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+ 2705360826U, // <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+ 2302659912U, // <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+ 2754238389U, // <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+ 2754238396U, // <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+ 3827980229U, // <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+ 2644625102U, // <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+ 2289393820U, // <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+ 1631619588U, // <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+ 2785056749U, // <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+ 3363138077U, // <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+ 2289397064U, // <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+ 1634274120U, // <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+ 1634937753U, // <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+ 1728272410U, // <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+ 2710006843U, // <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+ 2765740076U, // <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+ 1637592285U, // <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+ 1631620406U, // <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 2712661375U, // <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+ 2302676296U, // <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+ 1631620649U, // <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+ 1577091174U, // <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+ 1174443822U, // <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+ 2766035058U, // <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+ 1237565596U, // <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+ 1577094454U, // <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+ 296144182U, // <5,u,5,5>: Cost 1 vspltisw1 RHS
+ 1680496794U, // <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1237568840U, // <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+ 296144182U, // <5,u,5,u>: Cost 1 vspltisw1 RHS
+ 2633146470U, // <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+ 1175213870U, // <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+ 2633148309U, // <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+ 1228947612U, // <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 2633149750U, // <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+ 1175214234U, // <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+ 1228950018U, // <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 1228950856U, // <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+ 1228947617U, // <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+ 497614950U, // <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1571357492U, // <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1571358312U, // <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1571358870U, // <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 497618248U, // <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1571360772U, // <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1571361274U, // <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+ 1571361786U, // <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+ 497620782U, // <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+ 497623142U, // <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631622958U, // <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+ 1680496997U, // <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+ 1228963996U, // <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+ 497626441U, // <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 296144182U, // <5,u,u,5>: Cost 1 vspltisw1 RHS
+ 1680497037U, // <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+ 1228967240U, // <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+ 497628974U, // <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+ 2772451328U, // <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+ 2772451338U, // <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+ 3771146417U, // <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+ 3383095739U, // <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+ 3846193189U, // <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+ 3724832803U, // <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+ 3383095985U, // <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+ 3383096067U, // <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+ 2772451401U, // <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+ 2651095142U, // <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+ 2251612262U, // <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+ 1698709606U, // <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2651097602U, // <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+ 2651098422U, // <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+ 2651099172U, // <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+ 2657071869U, // <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+ 3724841978U, // <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+ 1698709660U, // <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2252292096U, // <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+ 1178550374U, // <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3826655418U, // <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+ 3777783485U, // <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+ 2252292434U, // <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+ 3785746280U, // <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+ 2252292593U, // <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+ 3736794583U, // <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+ 1178550941U, // <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 3375153152U, // <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+ 2772451584U, // <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+ 3777784163U, // <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+ 3846193426U, // <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+ 2712005122U, // <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+ 3724857382U, // <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+ 3802335864U, // <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+ 3801672410U, // <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+ 2772451647U, // <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+ 3383123968U, // <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+ 2772451666U, // <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+ 3773803577U, // <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+ 3724864002U, // <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+ 3846193517U, // <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+ 2712005935U, // <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+ 3327009265U, // <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+ 3383126648U, // <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+ 2772451729U, // <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+ 3373178880U, // <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+ 2254266470U, // <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 3785748248U, // <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+ 3790393190U, // <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+ 3328000338U, // <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+ 3785748494U, // <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+ 3785748516U, // <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+ 3379153528U, // <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+ 2254267037U, // <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+ 2254897152U, // <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+ 1181155430U, // <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 3785748923U, // <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+ 3785749042U, // <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+ 2254897490U, // <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+ 3785749169U, // <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+ 2724614962U, // <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+ 3787739982U, // <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+ 1181155997U, // <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1235664896U, // <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235666598U, // <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 3712943720U, // <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+ 2639202936U, // <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+ 2639203638U, // <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+ 2309409236U, // <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 3712946517U, // <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+ 2309409400U, // <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235666605U, // <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 1235673088U, // <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235674790U, // <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 1698710173U, // <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2639211129U, // <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+ 2639211830U, // <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+ 2712008858U, // <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+ 2657129220U, // <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+ 2309417592U, // <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1698710227U, // <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 3775799296U, // <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+ 2702057574U, // <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3373143763U, // <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+ 3695045122U, // <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+ 3775799634U, // <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+ 3383091538U, // <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+ 3368493233U, // <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+ 3362522319U, // <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+ 2702058141U, // <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+ 3834250027U, // <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+ 2772452148U, // <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 3832038210U, // <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+ 3373150660U, // <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+ 3834250067U, // <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+ 3373146450U, // <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+ 3826656102U, // <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+ 3362530511U, // <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+ 2772452148U, // <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+ 2669092966U, // <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+ 2252292916U, // <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+ 2252293014U, // <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+ 2772452246U, // <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+ 2669096246U, // <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+ 3846194091U, // <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+ 2702059450U, // <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+ 3870081978U, // <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+ 2702059633U, // <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+ 3775801494U, // <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+ 3777128723U, // <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+ 3775801702U, // <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+ 3775801756U, // <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+ 3775801858U, // <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+ 3375153490U, // <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+ 3826656265U, // <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+ 3775802051U, // <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+ 3775802142U, // <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+ 3846194206U, // <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+ 3846194219U, // <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+ 3846194228U, // <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+ 3846194236U, // <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+ 3846194246U, // <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+ 2760508496U, // <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+ 3368526001U, // <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+ 3870082144U, // <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+ 2760729707U, // <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+ 2714668660U, // <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+ 3834619005U, // <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+ 3834692742U, // <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+ 3846194317U, // <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+ 3834840216U, // <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+ 3834913953U, // <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+ 2719977570U, // <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+ 3367208143U, // <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+ 2719977724U, // <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+ 2669125734U, // <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+ 2254897972U, // <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+ 2254898070U, // <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+ 3775803929U, // <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+ 2669129014U, // <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+ 2322006354U, // <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+ 2725950264U, // <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+ 3793720142U, // <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+ 2254898556U, // <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+ 2627330150U, // <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+ 1235664906U, // <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235667094U, // <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309406894U, // <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2627333430U, // <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+ 1235665234U, // <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309406897U, // <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309407222U, // <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235664913U, // <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 2627338342U, // <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+ 1235673098U, // <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235675286U, // <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2772452732U, // <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+ 2627341622U, // <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+ 1235673426U, // <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309415089U, // <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309415414U, // <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235673105U, // <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 3324683725U, // <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+ 2725290086U, // <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+ 3771162801U, // <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+ 2309349478U, // <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3730951478U, // <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+ 3840738784U, // <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+ 3842655721U, // <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+ 3736925671U, // <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+ 2309349483U, // <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 3367840468U, // <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+ 3325355551U, // <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+ 3373147752U, // <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+ 2299404390U, // <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 3701099830U, // <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+ 3767846054U, // <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+ 3826656825U, // <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+ 3373147838U, // <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+ 2299404395U, // <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2657222758U, // <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+ 3771164219U, // <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+ 2766481000U, // <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+ 2772452978U, // <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+ 2657226038U, // <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+ 3790407528U, // <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+ 2252294074U, // <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+ 2252294148U, // <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+ 2772453023U, // <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+ 2772453030U, // <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+ 3834250930U, // <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+ 2765596349U, // <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+ 2301411430U, // <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+ 2772453070U, // <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+ 2765817560U, // <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+ 2252933050U, // <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+ 2796340968U, // <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+ 2766038771U, // <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+ 3725008998U, // <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+ 3368530217U, // <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+ 3840222989U, // <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+ 2309382246U, // <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 3725012278U, // <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+ 2766481193U, // <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+ 3842656049U, // <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+ 3327010820U, // <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+ 2766702404U, // <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+ 3713073254U, // <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+ 3789082310U, // <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+ 3840665439U, // <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+ 2766997352U, // <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+ 3713076534U, // <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+ 3791736842U, // <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+ 3373180605U, // <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+ 3793064108U, // <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+ 2767366037U, // <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+ 3701137510U, // <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+ 3701138647U, // <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+ 2254898792U, // <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+ 1248264294U, // <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 3701140790U, // <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+ 3725029435U, // <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+ 2254899130U, // <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+ 2725294981U, // <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+ 1248264299U, // <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 2633375846U, // <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+ 2309407468U, // <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235666536U, // <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161923174U, // <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2633379126U, // <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+ 2309407796U, // <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309408445U, // <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309407960U, // <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161923179U, // <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 2633384038U, // <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+ 2309415660U, // <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235674728U, // <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 161931366U, // <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+ 2633387318U, // <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+ 2769135725U, // <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+ 2309416637U, // <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309416152U, // <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 161931371U, // <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+ 3777806336U, // <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+ 2704064614U, // <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 3765862577U, // <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+ 3843393708U, // <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+ 2250516994U, // <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+ 3725054014U, // <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+ 3383093096U, // <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+ 3368495034U, // <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+ 2704065181U, // <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+ 2251622550U, // <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 3777807156U, // <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+ 3765863348U, // <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+ 3373147762U, // <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+ 3834251525U, // <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+ 3373147683U, // <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+ 3391727545U, // <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+ 2299406266U, // <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+ 2251622550U, // <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+ 2252294294U, // <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+ 3326036198U, // <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+ 3771836045U, // <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+ 2252294556U, // <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+ 2252294658U, // <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+ 3840739677U, // <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+ 2704066490U, // <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+ 3368511418U, // <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+ 2252294942U, // <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+ 3707158630U, // <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+ 3765864692U, // <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+ 2704066918U, // <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+ 2772453788U, // <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+ 2772453799U, // <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+ 3789752888U, // <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+ 3840739770U, // <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+ 2301413306U, // <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+ 2775108043U, // <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+ 2651340902U, // <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+ 3846195674U, // <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+ 3845974503U, // <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+ 2651343362U, // <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+ 2651344182U, // <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+ 1698712066U, // <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+ 3383125864U, // <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+ 3368527802U, // <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+ 1698933277U, // <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+ 3373179798U, // <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+ 3707176179U, // <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+ 2716012312U, // <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+ 3373180530U, // <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+ 2254309890U, // <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+ 3785773070U, // <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+ 3840739932U, // <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+ 2299439034U, // <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+ 2719994110U, // <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+ 2254899350U, // <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+ 3328641254U, // <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+ 2633443257U, // <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+ 2254899612U, // <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+ 2254899714U, // <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+ 3785773772U, // <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+ 2725966648U, // <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+ 2322007994U, // <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+ 2254899998U, // <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+ 1559707750U, // <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 2633450292U, // <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+ 1559709626U, // <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+ 1235666546U, // <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559711030U, // <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+ 2309408291U, // <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2633454152U, // <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+ 1235666874U, // <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559713582U, // <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+ 1559715942U, // <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 2633458484U, // <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+ 1559717819U, // <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+ 1235674738U, // <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1559719222U, // <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+ 1701366598U, // <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+ 2633462353U, // <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+ 1235675066U, // <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1559721774U, // <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+ 3785777152U, // <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+ 2712035430U, // <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3771179185U, // <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+ 3846196096U, // <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+ 3785777490U, // <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+ 2250517814U, // <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+ 3324259703U, // <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+ 3383092458U, // <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+ 2712035997U, // <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+ 3325356946U, // <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+ 3785777972U, // <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+ 3846196170U, // <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+ 3325365380U, // <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+ 3852168155U, // <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+ 2251615542U, // <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 3325357432U, // <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+ 3870084088U, // <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+ 2251615785U, // <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+ 2252295058U, // <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+ 3771180605U, // <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+ 3785778792U, // <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+ 3777816253U, // <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+ 2252295376U, // <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+ 1178553654U, // <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 2252295545U, // <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+ 3326037448U, // <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+ 1178553897U, // <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 3785779350U, // <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+ 3383118648U, // <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+ 3777816935U, // <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+ 3785779612U, // <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+ 2712037890U, // <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+ 2252754230U, // <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3784452764U, // <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+ 3801705178U, // <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+ 2252754473U, // <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+ 3787770770U, // <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+ 3383126840U, // <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+ 3327380534U, // <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+ 3784453265U, // <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+ 2253630672U, // <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+ 2778426587U, // <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+ 3383128789U, // <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+ 3381799580U, // <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+ 2778647798U, // <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+ 2651422822U, // <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+ 3701277928U, // <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+ 3701278650U, // <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+ 2651425282U, // <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+ 2651426102U, // <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+ 2651426892U, // <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+ 1698712886U, // <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3725169658U, // <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+ 1698712904U, // <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2254900114U, // <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+ 3389115192U, // <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+ 3785781727U, // <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+ 3785781810U, // <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+ 2254900432U, // <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+ 1181158710U, // <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2254900605U, // <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+ 3787772750U, // <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+ 1181158953U, // <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 2639495270U, // <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+ 2639496090U, // <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+ 3707267011U, // <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+ 2639497884U, // <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+ 1237658832U, // <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235666638U, // <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 3713241753U, // <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+ 2309409436U, // <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235666641U, // <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 2639503462U, // <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+ 2639504282U, // <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+ 3701303226U, // <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+ 2639506077U, // <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+ 1235676368U, // <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235674830U, // <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 1698713129U, // <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2309417628U, // <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1698713147U, // <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 3775832064U, // <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+ 2702090342U, // <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3775832241U, // <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+ 3719227906U, // <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+ 3775832402U, // <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+ 3385085146U, // <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+ 2309351938U, // <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+ 3376459134U, // <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+ 2702090909U, // <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+ 3719233546U, // <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+ 3775832884U, // <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+ 3775832982U, // <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+ 3846196909U, // <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+ 3719236984U, // <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+ 3856150209U, // <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+ 3834252997U, // <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+ 3870084817U, // <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+ 3769861532U, // <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+ 2645500006U, // <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+ 3719242548U, // <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+ 3775833704U, // <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+ 3775833766U, // <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+ 2645503353U, // <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+ 2252296196U, // <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+ 2702092218U, // <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+ 3719246842U, // <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+ 2702092405U, // <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+ 3775834262U, // <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+ 3777161495U, // <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+ 3775834470U, // <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+ 3775834524U, // <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+ 3775834626U, // <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+ 3385109722U, // <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+ 2309376514U, // <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3775834819U, // <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+ 2309376514U, // <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+ 3719258214U, // <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+ 3385117586U, // <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+ 3327242008U, // <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+ 3719260674U, // <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+ 3719261563U, // <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+ 2702093622U, // <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 2309384706U, // <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+ 3870085060U, // <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+ 2702093865U, // <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+ 3719266406U, // <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+ 3789106889U, // <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+ 3785789208U, // <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+ 3373183950U, // <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+ 2717355964U, // <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+ 2791772164U, // <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+ 2772455438U, // <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+ 3373183549U, // <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+ 2720010496U, // <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+ 2772455460U, // <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+ 2322008978U, // <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+ 3840225335U, // <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+ 2772455490U, // <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+ 2772455500U, // <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+ 2254901252U, // <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+ 2772455520U, // <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+ 2785874024U, // <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+ 2772455532U, // <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+ 2627625062U, // <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+ 1235667858U, // <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309409278U, // <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309407659U, // <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627628342U, // <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+ 1235668186U, // <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235667458U, // <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309407987U, // <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235667460U, // <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2627633254U, // <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+ 1235676050U, // <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309417470U, // <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309415851U, // <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2627636534U, // <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+ 1235676378U, // <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235675650U, // <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309416179U, // <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235675652U, // <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 2309352751U, // <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+ 1650917478U, // <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 2250584570U, // <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+ 3846197554U, // <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+ 2724659538U, // <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+ 3725275225U, // <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+ 2791772493U, // <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+ 2309352758U, // <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 1650918045U, // <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 3325358368U, // <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+ 2299406449U, // <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+ 2724660118U, // <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+ 3373148518U, // <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+ 3834253712U, // <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+ 3373147953U, // <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+ 2323297080U, // <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+ 2299407670U, // <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2299407671U, // <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 2252296489U, // <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+ 3326038394U, // <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+ 1178554874U, // <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724660902U, // <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+ 2252296817U, // <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+ 3840741864U, // <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+ 2252296976U, // <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+ 2785874426U, // <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+ 1178554874U, // <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2724661398U, // <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+ 3375154665U, // <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+ 3375154909U, // <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+ 2301413734U, // <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+ 2772455986U, // <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+ 3375154993U, // <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+ 2323313464U, // <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+ 2301414710U, // <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2301414711U, // <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 2724662162U, // <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+ 3326939559U, // <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+ 2253271546U, // <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+ 3383127346U, // <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+ 2309385523U, // <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+ 1650920758U, // <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 2724662653U, // <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+ 2309385526U, // <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 1650921001U, // <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 3725312102U, // <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+ 3373180393U, // <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+ 3791769368U, // <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+ 3373181286U, // <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+ 3725315382U, // <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+ 2299439221U, // <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+ 2724663394U, // <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+ 2299440438U, // <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 2299440439U, // <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1583808614U, // <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2254574074U, // <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+ 2322010609U, // <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+ 1583811894U, // <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2322010773U, // <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+ 363253046U, // <6,6,6,6>: Cost 1 vspltisw2 RHS
+ 1248267574U, // <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+ 363253046U, // <6,6,6,u>: Cost 1 vspltisw2 RHS
+ 2309410095U, // <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+ 2309408233U, // <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+ 2311402373U, // <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+ 2309409126U, // <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 2309410099U, // <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+ 2309408561U, // <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+ 1237660472U, // <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 161926454U, // <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 161926455U, // <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 1583808614U, // <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1650923310U, // <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+ 1178554874U, // <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2309417318U, // <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+ 1583811894U, // <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1650923674U, // <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+ 363253046U, // <6,6,u,6>: Cost 1 vspltisw2 RHS
+ 161934646U, // <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+ 161934647U, // <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+ 1638318080U, // <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564576358U, // <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712060077U, // <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712060156U, // <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638318418U, // <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577865314U, // <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+ 2712060406U, // <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2651608058U, // <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+ 564576925U, // <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712060643U, // <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638318900U, // <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638318998U, // <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 3766559753U, // <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+ 2712060971U, // <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712061039U, // <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712061135U, // <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 3373148612U, // <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+ 1638319484U, // <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712061373U, // <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712061471U, // <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638319720U, // <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638319782U, // <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712061709U, // <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712061800U, // <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1638320058U, // <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252297836U, // <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+ 1638320187U, // <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638320278U, // <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712062182U, // <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2712062256U, // <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+ 1638320540U, // <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638320642U, // <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712062546U, // <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712062584U, // <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2712062659U, // <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+ 1638320926U, // <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638321042U, // <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712062922U, // <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712063029U, // <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712063108U, // <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638321360U, // <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564579638U, // <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712063357U, // <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+ 2712063439U, // <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+ 564579881U, // <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712063560U, // <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714054287U, // <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712063742U, // <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 3373181295U, // <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+ 2712063924U, // <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638322180U, // <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638322274U, // <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 3373181380U, // <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+ 1640313092U, // <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712064289U, // <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712064423U, // <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638322682U, // <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712064562U, // <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712064653U, // <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712064747U, // <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638323000U, // <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638323022U, // <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638323168U, // <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+ 1237659746U, // <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309411158U, // <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+ 2639718330U, // <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+ 1235669498U, // <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1237659750U, // <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309411243U, // <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+ 1583895362U, // <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+ 1235669826U, // <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 1235669503U, // <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+ 1638323923U, // <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564582190U, // <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638324101U, // <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638324156U, // <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638324287U, // <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564582554U, // <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638324432U, // <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 1235678018U, // <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+ 564582757U, // <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 1638326272U, // <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564584550U, // <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712068269U, // <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2309349532U, // <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+ 1638326610U, // <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1577939051U, // <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+ 2712068598U, // <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2309352776U, // <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+ 564585117U, // <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 2712068835U, // <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+ 1638327092U, // <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1698715438U, // <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2299404444U, // <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+ 2712069163U, // <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+ 2712069231U, // <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2712069327U, // <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+ 2299407688U, // <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+ 1698715492U, // <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+ 2712069565U, // <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 1178556206U, // <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+ 1638327912U, // <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638327974U, // <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712069901U, // <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 1178556570U, // <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+ 1638328250U, // <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+ 2252298496U, // <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+ 1638328379U, // <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+ 1638328470U, // <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712070374U, // <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2704107883U, // <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+ 1638328732U, // <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638328834U, // <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712070738U, // <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 2712070776U, // <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+ 2301414728U, // <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+ 1638329118U, // <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638329234U, // <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712071114U, // <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712071221U, // <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2309382300U, // <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+ 1638329552U, // <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564587831U, // <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712071545U, // <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2309385544U, // <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+ 564588073U, // <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 2712071752U, // <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+ 2714062479U, // <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712071934U, // <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2299437212U, // <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+ 2712072116U, // <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+ 1638330372U, // <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1698715802U, // <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 2299440456U, // <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+ 1698715820U, // <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+ 1583808614U, // <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 1181161262U, // <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+ 1638330874U, // <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1248264348U, // <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+ 1583811894U, // <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 1181161626U, // <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+ 363253046U, // <6,u,6,6>: Cost 1 vspltisw2 RHS
+ 1638331214U, // <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 363253046U, // <6,u,6,u>: Cost 1 vspltisw2 RHS
+ 1560076390U, // <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+ 1235664969U, // <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1560078311U, // <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+ 161923228U, // <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 1560079670U, // <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+ 1235665297U, // <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235667485U, // <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 161926472U, // <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 161923233U, // <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 1560084582U, // <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+ 564590382U, // <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1560086504U, // <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+ 161931420U, // <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+ 1560087862U, // <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+ 564590746U, // <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 363253046U, // <6,u,u,6>: Cost 1 vspltisw2 RHS
+ 161934664U, // <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+ 161931425U, // <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+ 1705426944U, // <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705426954U, // <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+ 3713550266U, // <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+ 2316063892U, // <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+ 2779168805U, // <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+ 2663698530U, // <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2657727309U, // <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+ 2316064220U, // <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+ 1705427017U, // <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+ 1583988838U, // <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+ 2779168859U, // <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+ 631685222U, // <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639817411U, // <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+ 1583992118U, // <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+ 2657734660U, // <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+ 1583993678U, // <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+ 2657735672U, // <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+ 631685276U, // <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779168933U, // <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+ 2767667377U, // <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+ 2718713448U, // <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+ 2718713510U, // <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+ 3841409228U, // <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+ 3852910802U, // <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+ 2718713786U, // <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+ 3847160036U, // <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+ 2767667440U, // <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+ 2718714006U, // <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+ 2779169020U, // <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+ 3852910853U, // <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+ 2718714268U, // <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+ 2718714370U, // <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+ 2718714461U, // <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+ 2706770608U, // <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+ 3847160114U, // <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+ 2779169083U, // <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+ 2718714770U, // <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+ 1705427282U, // <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+ 3713583034U, // <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+ 3713583814U, // <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+ 2779169133U, // <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+ 1644973366U, // <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 2657760081U, // <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+ 2259468868U, // <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+ 1705427345U, // <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+ 2718715508U, // <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+ 2260123750U, // <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+ 3792457451U, // <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+ 3852911024U, // <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+ 2718715836U, // <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+ 2718715908U, // <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+ 1644974178U, // <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+ 3792457853U, // <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+ 1646301444U, // <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+ 2720706901U, // <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+ 2779169270U, // <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+ 2718716410U, // <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+ 2722697800U, // <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+ 3852911121U, // <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+ 3852911130U, // <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+ 2718716728U, // <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+ 2718716750U, // <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+ 2779169333U, // <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+ 2718716922U, // <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+ 1187872870U, // <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2718717076U, // <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+ 3847160408U, // <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+ 2718717286U, // <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+ 2718717377U, // <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+ 2718717404U, // <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+ 2718717478U, // <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+ 1187873437U, // <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 1584046182U, // <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+ 1705427602U, // <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+ 631685789U, // <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 2639874762U, // <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+ 1584049462U, // <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+ 1644976282U, // <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+ 1584051029U, // <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+ 2718718208U, // <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+ 631685843U, // <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 2721374218U, // <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+ 2779169507U, // <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+ 2779169516U, // <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+ 3852911348U, // <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+ 2669743414U, // <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+ 2316058962U, // <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+ 2316059044U, // <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+ 2669745146U, // <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+ 2779169570U, // <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+ 2779169579U, // <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+ 1705427764U, // <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169598U, // <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+ 3713632972U, // <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+ 2779169619U, // <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+ 2779169628U, // <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+ 2657809239U, // <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+ 3835290474U, // <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+ 1705427764U, // <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779169660U, // <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+ 2779169671U, // <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+ 2779169680U, // <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+ 1705427862U, // <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+ 2779169700U, // <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+ 2779169707U, // <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+ 2657817432U, // <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+ 2803057594U, // <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+ 1705427907U, // <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+ 3776538827U, // <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+ 2319400970U, // <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+ 2316085398U, // <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+ 3852911591U, // <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+ 3852911600U, // <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+ 2319401298U, // <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+ 3833668617U, // <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+ 3367265487U, // <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+ 2319400977U, // <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+ 2724031378U, // <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+ 2779169835U, // <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+ 2779169844U, // <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+ 3852911672U, // <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+ 2669776182U, // <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+ 2779169872U, // <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+ 3835290712U, // <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+ 2669778278U, // <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+ 2779169898U, // <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+ 2779169903U, // <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+ 3835585661U, // <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+ 3841410182U, // <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+ 3852911753U, // <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+ 2779169943U, // <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+ 2318754130U, // <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+ 2718724195U, // <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+ 3859178670U, // <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+ 2779169975U, // <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+ 2720715094U, // <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+ 2761549007U, // <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+ 2779170008U, // <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+ 3835438305U, // <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+ 3835512042U, // <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+ 2761843955U, // <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+ 3835659516U, // <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+ 2803057918U, // <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+ 2762065166U, // <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+ 2669797478U, // <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+ 2322087946U, // <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+ 2317448186U, // <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+ 3395829934U, // <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+ 2669800758U, // <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+ 2322088274U, // <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+ 3375923377U, // <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+ 2731996780U, // <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+ 2322087953U, // <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+ 2779170146U, // <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+ 1705427764U, // <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 2779170164U, // <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+ 1705428348U, // <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+ 2779170186U, // <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+ 2763171221U, // <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+ 2657866590U, // <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+ 2803058080U, // <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+ 1705428393U, // <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+ 3713695846U, // <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+ 2779170237U, // <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+ 2779170245U, // <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+ 1242316902U, // <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3713699126U, // <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+ 3852912096U, // <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+ 2767668713U, // <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+ 2256488426U, // <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+ 1242316907U, // <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 3852912132U, // <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+ 3852912141U, // <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+ 3852912149U, // <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+ 2779170335U, // <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+ 3852912172U, // <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+ 3840747062U, // <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+ 3841410617U, // <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+ 3795125538U, // <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+ 2779170380U, // <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+ 2779170389U, // <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+ 3852912222U, // <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+ 1705428584U, // <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705428594U, // <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+ 2779170429U, // <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+ 3852912259U, // <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+ 2767668880U, // <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+ 3841336981U, // <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+ 1705428639U, // <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+ 1705428646U, // <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+ 2779170479U, // <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+ 2767668925U, // <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+ 1245659238U, // <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705428686U, // <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+ 2779170519U, // <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+ 2657899362U, // <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+ 2319406574U, // <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+ 1705428718U, // <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+ 3713728614U, // <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+ 3852912388U, // <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+ 2779170573U, // <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+ 1242349670U, // <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3713731894U, // <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+ 2779170601U, // <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+ 2767669041U, // <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+ 3389834456U, // <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+ 1242349675U, // <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 3852912456U, // <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+ 3852912466U, // <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+ 3852912475U, // <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+ 2779170664U, // <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+ 3852912496U, // <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+ 3792474116U, // <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+ 2718732388U, // <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+ 3841337228U, // <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+ 2779170709U, // <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+ 2640003174U, // <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+ 2721386920U, // <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+ 2767595441U, // <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+ 1693927354U, // <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+ 2640006454U, // <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+ 3841558476U, // <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+ 2657923941U, // <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+ 3841337310U, // <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+ 1694296039U, // <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+ 2803058666U, // <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+ 3852912632U, // <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+ 2322089576U, // <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+ 1248346214U, // <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 3841337362U, // <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+ 3395830836U, // <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+ 2261616570U, // <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+ 3371943857U, // <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+ 1248346219U, // <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705429051U, // <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+ 2779170884U, // <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+ 1705428584U, // <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1695254620U, // <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+ 1705429091U, // <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+ 2779170924U, // <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+ 2767669361U, // <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+ 2803058809U, // <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+ 1695623305U, // <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+ 2779170955U, // <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+ 1705429142U, // <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+ 2634057732U, // <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+ 2779170983U, // <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+ 2779170992U, // <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+ 3852912829U, // <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+ 2657948520U, // <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+ 2316060602U, // <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+ 1705429205U, // <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+ 3852912860U, // <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+ 2779171046U, // <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+ 2779171057U, // <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+ 3852912887U, // <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+ 3852912896U, // <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+ 3852912905U, // <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+ 3835291923U, // <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+ 3841411356U, // <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+ 2779171111U, // <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+ 2779171120U, // <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+ 3852912952U, // <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+ 2779171137U, // <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+ 2779171144U, // <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+ 2779171156U, // <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+ 3852912989U, // <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+ 2767669606U, // <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+ 2767669615U, // <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+ 2779171189U, // <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+ 2779171198U, // <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+ 3852913032U, // <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+ 2704140655U, // <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+ 1705429404U, // <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171238U, // <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+ 3852913070U, // <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+ 2657973099U, // <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+ 2767669700U, // <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+ 1705429404U, // <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2779171280U, // <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+ 2779171290U, // <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+ 2634090504U, // <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+ 2779171311U, // <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+ 2779171319U, // <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+ 1705429506U, // <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+ 2722057593U, // <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+ 2316093370U, // <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+ 1705429533U, // <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+ 3852913185U, // <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+ 3795799695U, // <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+ 3852913203U, // <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+ 3852913214U, // <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+ 3852913225U, // <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+ 2779171410U, // <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+ 2718740581U, // <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+ 3841411685U, // <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+ 2720067847U, // <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+ 2773420664U, // <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+ 3847236225U, // <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+ 1648316922U, // <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+ 2773641875U, // <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+ 2773715612U, // <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+ 3847531173U, // <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+ 2722059024U, // <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+ 2767669943U, // <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+ 1652298720U, // <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+ 2767669955U, // <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+ 3841411788U, // <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+ 2767669978U, // <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+ 2722059546U, // <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+ 2767669995U, // <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+ 3852913396U, // <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+ 2722059758U, // <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+ 2302183354U, // <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+ 2767670027U, // <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+ 2774747930U, // <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+ 1705429790U, // <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+ 1660262316U, // <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+ 1705429404U, // <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+ 2775042878U, // <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+ 1705429830U, // <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+ 2779171660U, // <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+ 2767670101U, // <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+ 1705429853U, // <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+ 2718744576U, // <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+ 1645002854U, // <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 3852913527U, // <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+ 3852913536U, // <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+ 2316061904U, // <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+ 1705429906U, // <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658022257U, // <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+ 2256489928U, // <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1707420589U, // <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+ 3852913590U, // <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+ 2718745396U, // <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+ 2779171786U, // <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+ 3852913616U, // <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+ 3852913627U, // <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+ 2779171810U, // <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+ 3792487631U, // <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+ 3394456220U, // <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+ 2779171837U, // <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+ 3852913673U, // <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+ 3852913682U, // <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+ 2718746216U, // <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+ 2718746278U, // <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+ 2779171885U, // <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+ 2779171893U, // <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+ 2718746554U, // <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+ 3847457864U, // <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+ 2779171921U, // <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+ 2718746774U, // <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+ 3852913762U, // <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+ 3852913772U, // <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+ 2718747036U, // <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+ 2718747138U, // <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+ 2779171972U, // <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+ 2706803380U, // <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+ 3847457946U, // <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+ 2781162655U, // <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+ 2718747538U, // <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+ 3852913842U, // <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+ 3852913852U, // <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+ 2316096696U, // <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+ 1705430224U, // <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705430234U, // <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+ 2658055029U, // <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+ 2316097024U, // <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+ 1707420917U, // <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+ 1584316518U, // <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+ 2658059060U, // <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+ 2640144314U, // <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+ 2640145131U, // <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+ 1584319798U, // <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+ 2779172134U, // <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+ 631688502U, // <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2658063354U, // <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+ 631688520U, // <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 3852914001U, // <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+ 3852914010U, // <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+ 2718749178U, // <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+ 2722730572U, // <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+ 2723394205U, // <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+ 2779172221U, // <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+ 2718749496U, // <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+ 2718749518U, // <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+ 2779172249U, // <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+ 2718749690U, // <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+ 3847458214U, // <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+ 2718749880U, // <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+ 3847458236U, // <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+ 2718750004U, // <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+ 1187876150U, // <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2718750208U, // <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+ 2718750286U, // <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+ 1187876393U, // <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 1584341094U, // <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+ 1645008686U, // <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+ 2640168890U, // <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+ 2640169710U, // <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+ 1584344374U, // <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+ 1705430554U, // <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+ 631688745U, // <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 2718750976U, // <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+ 631688763U, // <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+ 2646147174U, // <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+ 2779172424U, // <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+ 3852914258U, // <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+ 3852914268U, // <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+ 2779172450U, // <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+ 2316061914U, // <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+ 2316061186U, // <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+ 2646152186U, // <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+ 2779172486U, // <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+ 2781163151U, // <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+ 2321378194U, // <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+ 3852914339U, // <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+ 3852914350U, // <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+ 2781163191U, // <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+ 3852914363U, // <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+ 3835588297U, // <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+ 3835588306U, // <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+ 2781163223U, // <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+ 3852914400U, // <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+ 2781163243U, // <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+ 3852914419U, // <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+ 2779172606U, // <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+ 3780552497U, // <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+ 2781163279U, // <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2779172632U, // <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+ 3835588385U, // <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+ 2779172650U, // <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+ 3852914481U, // <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+ 2319403922U, // <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+ 2319404409U, // <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 3852914510U, // <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+ 3779226131U, // <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+ 2319404250U, // <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+ 2319403522U, // <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+ 3852914547U, // <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+ 2319403524U, // <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+ 2646179942U, // <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+ 2316094354U, // <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+ 3852914582U, // <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+ 3852914592U, // <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+ 2646183372U, // <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+ 2779172788U, // <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+ 2316093954U, // <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+ 2646185318U, // <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+ 2779172815U, // <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+ 2781163475U, // <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+ 2781163484U, // <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+ 3852914662U, // <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+ 3852914672U, // <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+ 2781163515U, // <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+ 1705431044U, // <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172878U, // <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+ 3835588632U, // <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+ 1705431044U, // <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779172900U, // <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+ 2781163571U, // <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+ 3852914743U, // <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+ 2779172930U, // <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+ 2779172940U, // <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+ 2781163607U, // <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 2779172960U, // <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+ 1705431138U, // <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+ 1705578603U, // <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+ 2646204518U, // <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2322090898U, // <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+ 3719947880U, // <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+ 3719948438U, // <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+ 2646207951U, // <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+ 2322091226U, // <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+ 2322090498U, // <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+ 2646210156U, // <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+ 2646210350U, // <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+ 2779173062U, // <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+ 2779173072U, // <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+ 2319404409U, // <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+ 2779173092U, // <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+ 2779173101U, // <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+ 1705431044U, // <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 2779173118U, // <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+ 1705578756U, // <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+ 1707421965U, // <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+ 3852914966U, // <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+ 2779173153U, // <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+ 2256491002U, // <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+ 3852914994U, // <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+ 3852915003U, // <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+ 2316062652U, // <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316063544U, // <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+ 1242320182U, // <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1242320183U, // <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 3852915048U, // <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+ 3377866217U, // <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+ 3852915068U, // <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+ 3833672072U, // <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+ 3852915088U, // <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+ 3395122056U, // <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+ 3389813560U, // <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+ 2779173287U, // <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+ 2779320752U, // <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+ 2658181222U, // <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+ 3852915140U, // <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+ 2257973754U, // <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+ 3841413589U, // <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+ 2658184502U, // <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+ 3852915176U, // <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+ 2658186117U, // <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+ 1705431546U, // <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+ 1705579011U, // <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+ 3714015334U, // <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+ 3777243425U, // <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+ 2319405957U, // <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+ 3375229286U, // <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+ 2779173426U, // <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+ 3375228721U, // <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+ 2319405880U, // <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+ 1245662518U, // <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1245662519U, // <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 3852915291U, // <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+ 3389834729U, // <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+ 2259472890U, // <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+ 3852915321U, // <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+ 3852915330U, // <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+ 2779173517U, // <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+ 2316096312U, // <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+ 1242352950U, // <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1242352951U, // <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 3852915372U, // <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+ 3835294392U, // <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+ 3852915395U, // <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+ 3852915404U, // <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+ 3852915412U, // <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+ 3377899313U, // <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+ 2718765160U, // <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+ 2779173611U, // <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+ 2779321076U, // <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+ 2658213990U, // <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+ 3852915462U, // <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+ 2718765562U, // <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+ 3714042622U, // <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+ 2658217270U, // <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+ 2724074224U, // <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+ 1705431864U, // <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705431874U, // <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+ 1705579339U, // <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+ 1705431886U, // <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+ 2779173719U, // <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+ 2779173729U, // <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+ 2779173736U, // <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+ 1705431926U, // <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+ 2779173759U, // <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+ 2779173765U, // <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+ 1248349494U, // <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+ 1705431958U, // <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+ 1705579423U, // <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+ 2779173801U, // <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+ 2779321266U, // <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+ 2779321273U, // <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+ 1705579463U, // <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+ 2779173841U, // <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+ 1705431864U, // <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+ 1705432032U, // <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+ 1705579495U, // <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+ 1242320994U, // <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705432058U, // <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+ 3841414146U, // <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+ 2316063226U, // <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+ 2779173908U, // <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+ 2658242658U, // <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+ 2658243468U, // <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+ 2316063554U, // <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+ 1705432121U, // <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+ 3852915777U, // <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+ 2779173962U, // <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+ 2779173973U, // <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+ 3389813242U, // <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+ 3852915813U, // <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+ 3852915821U, // <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+ 3835294839U, // <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+ 2329343596U, // <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+ 2779174027U, // <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+ 2803061908U, // <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+ 3852915869U, // <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+ 2779174053U, // <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+ 2779174060U, // <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+ 2803061944U, // <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+ 3852915905U, // <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+ 2767672522U, // <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+ 2791855315U, // <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+ 2768999644U, // <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+ 2779174115U, // <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+ 3852915948U, // <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+ 3841414394U, // <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+ 1245663738U, // <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174155U, // <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+ 3852915988U, // <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+ 2706827959U, // <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+ 2319405890U, // <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+ 1245663738U, // <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 2779174200U, // <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+ 3852916030U, // <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+ 3714099130U, // <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+ 2316095994U, // <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+ 1242353766U, // <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705432422U, // <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+ 2658276240U, // <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+ 2316096322U, // <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+ 1705432449U, // <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+ 3852916101U, // <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+ 3854906765U, // <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+ 3852916121U, // <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+ 3389846010U, // <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+ 3852916141U, // <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+ 2779174326U, // <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+ 2779174337U, // <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+ 2329376364U, // <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+ 2779321811U, // <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+ 2658287718U, // <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+ 3852916197U, // <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+ 2779174382U, // <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+ 2316112378U, // <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+ 2658290998U, // <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+ 3852916233U, // <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+ 1651004226U, // <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+ 2779174420U, // <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+ 1652331492U, // <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+ 1590526054U, // <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+ 2328728623U, // <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+ 2724746451U, // <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+ 2322092538U, // <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+ 1590529334U, // <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+ 2328728951U, // <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+ 2724746770U, // <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+ 430361910U, // <7,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,7,u>: Cost 1 vspltisw3 RHS
+ 1242320994U, // <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+ 1705580162U, // <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+ 2779321996U, // <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+ 1245663738U, // <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+ 1242353766U, // <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+ 1705580202U, // <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+ 1662949620U, // <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+ 430361910U, // <7,7,u,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,7,u,u>: Cost 1 vspltisw3 RHS
+ 1705426944U, // <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+ 1705432787U, // <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+ 2316060885U, // <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+ 1242316956U, // <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+ 2779174637U, // <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+ 1182750874U, // <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+ 2316061213U, // <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+ 1242320200U, // <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+ 1705432850U, // <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+ 1584578662U, // <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+ 1705427764U, // <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+ 631691054U, // <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+ 2640407307U, // <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+ 1584581942U, // <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+ 2779174726U, // <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+ 1584583574U, // <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+ 2779322201U, // <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+ 631691108U, // <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+ 2779174763U, // <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+ 2779174774U, // <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+ 1705428584U, // <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+ 1705432965U, // <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+ 2779174801U, // <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+ 2779174810U, // <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+ 2767673251U, // <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+ 1705580460U, // <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+ 1705433010U, // <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+ 1705433020U, // <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+ 2779174853U, // <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+ 2767673299U, // <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+ 1245659292U, // <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+ 1705433060U, // <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+ 2779174893U, // <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+ 2706836152U, // <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+ 1245662536U, // <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+ 1705433092U, // <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+ 2779174925U, // <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+ 1185732398U, // <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+ 2316093653U, // <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+ 1242349724U, // <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+ 1705430224U, // <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+ 1705433151U, // <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+ 2316093981U, // <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+ 1242352968U, // <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+ 1705433178U, // <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+ 1584611430U, // <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+ 2781165670U, // <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+ 2640439226U, // <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+ 2640440079U, // <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+ 1584614710U, // <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+ 1705431044U, // <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+ 631691418U, // <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+ 2779322525U, // <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+ 631691436U, // <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+ 2779175087U, // <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+ 2779175102U, // <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+ 1648357887U, // <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+ 1705433296U, // <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+ 2779175127U, // <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+ 2779175138U, // <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+ 1651012419U, // <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+ 1705580788U, // <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+ 1705433341U, // <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+ 1705580800U, // <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+ 1187878702U, // <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+ 2768042263U, // <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+ 1248346268U, // <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+ 1705580840U, // <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+ 1187879066U, // <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+ 2779322679U, // <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+ 430361910U, // <7,u,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <7,u,7,u>: Cost 1 vspltisw3 RHS
+ 1705433425U, // <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+ 1705433435U, // <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+ 631691621U, // <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+ 1705433451U, // <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+ 1705433465U, // <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+ 1705433475U, // <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+ 631691661U, // <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+ 430361910U, // <7,u,u,7>: Cost 1 vspltisw3 RHS
+ 631691675U, // <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+ 202162278U, // <u,0,0,0>: Cost 1 vspltisw0 LHS
+ 1678598154U, // <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+ 2634500154U, // <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+ 2289596269U, // <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+ 1548815670U, // <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+ 2663698530U, // <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+ 2658390942U, // <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+ 2289596597U, // <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+ 202162278U, // <u,0,0,u>: Cost 1 vspltisw0 LHS
+ 1560764518U, // <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+ 115720294U, // <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+ 604856427U, // <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 2634508438U, // <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+ 1560767798U, // <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+ 2652426438U, // <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+ 1584657311U, // <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+ 2658399226U, // <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+ 604856476U, // <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 2696889850U, // <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+ 1190174822U, // <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 2692245096U, // <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+ 2692245158U, // <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+ 2263916882U, // <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+ 2299709908U, // <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+ 2692245434U, // <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+ 2701535281U, // <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+ 1190175389U, // <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 1209237504U, // <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+ 1209239206U, // <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+ 2704189813U, // <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+ 2692245916U, // <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+ 2282981033U, // <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+ 2664386658U, // <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+ 2691877496U, // <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+ 2664388218U, // <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+ 1209239213U, // <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+ 2289623040U, // <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+ 1678598482U, // <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+ 2634532926U, // <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+ 2235580672U, // <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+ 1143619922U, // <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+ 1618505014U, // <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 2658423714U, // <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+ 2713259464U, // <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+ 1683243409U, // <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+ 1192443904U, // <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+ 118702182U, // <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+ 2266185901U, // <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+ 2640513816U, // <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+ 1192444242U, // <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+ 2718789636U, // <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+ 1645047915U, // <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+ 2664404604U, // <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+ 118702749U, // <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+ 2302910464U, // <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+ 1192886374U, // <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 2718790138U, // <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+ 2722771537U, // <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+ 2266628434U, // <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+ 2248950180U, // <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+ 2718790456U, // <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+ 2718790478U, // <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+ 1192886941U, // <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1235812352U, // <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+ 1235814054U, // <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+ 2728080601U, // <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+ 2640530202U, // <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+ 2640530742U, // <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+ 2309556692U, // <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+ 2730735133U, // <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+ 2309556856U, // <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 1235814061U, // <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+ 202162278U, // <u,0,u,0>: Cost 1 vspltisw0 LHS
+ 120365158U, // <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+ 604856989U, // <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 2692249532U, // <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+ 1560825142U, // <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+ 1618507930U, // <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+ 1584714662U, // <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+ 2309565048U, // <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+ 604857043U, // <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+ 1611210825U, // <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+ 1616519270U, // <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+ 2287605459U, // <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+ 2640546588U, // <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+ 2622631222U, // <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+ 2289590610U, // <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+ 2664436630U, // <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+ 2664437376U, // <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+ 1616519889U, // <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+ 1548894866U, // <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+ 269271142U, // <u,1,1,1>: Cost 1 vspltisw1 LHS
+ 1189462934U, // <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+ 2622638230U, // <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+ 1548897590U, // <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+ 2756985692U, // <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+ 2658472872U, // <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+ 2287614142U, // <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+ 269271142U, // <u,1,1,u>: Cost 1 vspltisw1 LHS
+ 1566818406U, // <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+ 2756985735U, // <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+ 1148371862U, // <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+ 835584U, // <u,1,2,3>: Cost 0 copy LHS
+ 1566821686U, // <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+ 2756985771U, // <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+ 2690262970U, // <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+ 1590711938U, // <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+ 835584U, // <u,1,2,u>: Cost 0 copy LHS
+ 2282979337U, // <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+ 1209237514U, // <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+ 1209239702U, // <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 2282979502U, // <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+ 2282979341U, // <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+ 1209237842U, // <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2282979505U, // <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 2287625423U, // <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+ 1209237521U, // <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+ 1635101613U, // <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+ 2289623050U, // <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+ 2289625238U, // <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+ 2640579360U, // <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+ 2622663990U, // <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+ 1616522550U, // <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 2664469398U, // <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+ 2664470148U, // <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+ 1616522793U, // <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+ 1548927638U, // <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+ 1192444724U, // <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+ 1192444822U, // <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+ 2622670998U, // <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+ 1548930358U, // <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+ 1210728786U, // <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+ 2714153058U, // <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+ 2670449658U, // <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+ 1548932910U, // <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+ 2622677655U, // <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+ 2756986063U, // <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+ 2302912662U, // <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+ 3696421014U, // <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+ 2622680374U, // <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+ 2756986099U, // <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+ 2714153784U, // <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+ 1651692438U, // <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+ 1652356071U, // <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+ 2628657254U, // <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+ 1235812362U, // <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+ 1235814550U, // <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+ 2309554350U, // <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+ 2628660534U, // <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+ 1235812690U, // <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+ 2309554353U, // <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+ 2309554678U, // <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+ 1235812369U, // <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+ 1548952217U, // <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+ 269271142U, // <u,1,u,1>: Cost 1 vspltisw1 LHS
+ 1209280662U, // <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+ 835584U, // <u,1,u,3>: Cost 0 copy LHS
+ 1548954934U, // <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+ 1209278802U, // <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+ 2283020465U, // <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+ 1590761096U, // <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+ 835584U, // <u,1,u,u>: Cost 0 copy LHS
+ 2702876672U, // <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+ 1629134950U, // <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+ 2289591912U, // <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+ 1215848550U, // <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2702877010U, // <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+ 2289222708U, // <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+ 2779178473U, // <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+ 2726249024U, // <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+ 1215848555U, // <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 2690933539U, // <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+ 2628683124U, // <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+ 1189463656U, // <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+ 1213866086U, // <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 2628685110U, // <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+ 2263205736U, // <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+ 1189463994U, // <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 2263205866U, // <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+ 1213866091U, // <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1556938854U, // <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+ 2697569869U, // <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+ 336380006U, // <u,2,2,2>: Cost 1 vspltisw2 LHS
+ 1678599794U, // <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+ 1556942134U, // <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+ 2295138061U, // <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+ 2702878650U, // <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+ 2300229831U, // <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+ 336380006U, // <u,2,2,u>: Cost 1 vspltisw2 LHS
+ 475243165U, // <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1548985140U, // <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 1209239144U, // <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+ 135495782U, // <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+ 475245878U, // <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1596764164U, // <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+ 1596764666U, // <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+ 1596765178U, // <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135495787U, // <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+ 2708851630U, // <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+ 2217362979U, // <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+ 2289624680U, // <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+ 1215881318U, // <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2726767824U, // <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+ 1629138230U, // <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 2779178801U, // <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+ 2726251976U, // <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+ 1215881323U, // <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 2628714598U, // <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+ 2628715896U, // <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+ 1192445544U, // <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+ 1213898854U, // <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2628717878U, // <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+ 2726768644U, // <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+ 1192445882U, // <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+ 2266187754U, // <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+ 1213898859U, // <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 2634694758U, // <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+ 2721460657U, // <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+ 2296940136U, // <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+ 1678600122U, // <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+ 2634698038U, // <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+ 3370682125U, // <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+ 1157056442U, // <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+ 2725442455U, // <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+ 1678600167U, // <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+ 1653027897U, // <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+ 2309554924U, // <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+ 1235813992U, // <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+ 162070630U, // <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+ 2634706230U, // <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+ 2309555252U, // <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+ 2309555901U, // <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+ 2309555416U, // <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+ 162070635U, // <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+ 475284130U, // <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 1549026100U, // <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+ 336380006U, // <u,2,u,2>: Cost 1 vspltisw2 LHS
+ 135536742U, // <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+ 475286838U, // <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 1629141146U, // <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+ 1194108858U, // <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+ 1596806138U, // <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+ 135536747U, // <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+ 1611890688U, // <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+ 538149020U, // <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685632685U, // <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 2685632764U, // <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+ 1611891026U, // <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 2733408722U, // <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+ 2658612153U, // <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+ 2289592250U, // <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+ 538149533U, // <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1189464214U, // <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+ 1611891508U, // <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+ 1611891606U, // <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+ 1189464476U, // <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+ 1189464578U, // <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+ 2690278511U, // <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+ 2690278607U, // <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+ 2287609786U, // <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+ 1611892092U, // <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+ 2685634042U, // <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+ 2685634079U, // <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+ 1611892328U, // <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+ 1611892390U, // <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+ 2685634371U, // <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+ 2685634453U, // <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+ 1611892666U, // <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 2300225466U, // <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+ 1611892795U, // <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+ 1209238422U, // <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+ 2282980247U, // <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+ 1561004120U, // <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+ 403488870U, // <u,3,3,3>: Cost 1 vspltisw3 LHS
+ 1209238426U, // <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+ 2282980899U, // <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+ 2282985598U, // <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+ 1209239482U, // <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 403488870U, // <u,3,3,u>: Cost 1 vspltisw3 LHS
+ 1555038310U, // <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+ 1555039616U, // <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+ 2628781672U, // <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+ 2289624690U, // <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+ 1555041590U, // <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+ 538152246U, // <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2658644925U, // <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+ 2289625018U, // <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+ 538152489U, // <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1192446102U, // <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+ 2733411983U, // <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+ 2634762330U, // <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+ 1192446364U, // <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+ 1192446466U, // <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+ 1659670532U, // <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+ 1659670626U, // <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+ 2287642554U, // <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+ 1659670788U, // <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+ 2634768486U, // <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+ 2733412775U, // <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+ 1648390659U, // <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+ 2634770973U, // <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+ 2634771766U, // <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+ 2733413099U, // <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+ 1659671352U, // <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+ 1659671374U, // <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+ 1652372457U, // <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+ 1561034854U, // <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 2634777396U, // <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+ 1561036892U, // <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+ 1235814002U, // <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+ 1561038134U, // <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+ 2309555747U, // <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+ 2309556072U, // <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+ 1235814330U, // <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+ 1561040686U, // <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+ 1611896531U, // <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+ 538154798U, // <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 1611896712U, // <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+ 403488870U, // <u,3,u,3>: Cost 1 vspltisw3 LHS
+ 1611896895U, // <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+ 538155162U, // <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 1611897040U, // <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+ 1209280442U, // <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+ 538155365U, // <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+ 1165118354U, // <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+ 1618534502U, // <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 2634795102U, // <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+ 2686451968U, // <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+ 2692276562U, // <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+ 1705438098U, // <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+ 2658685890U, // <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+ 2256489928U, // <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+ 1618535069U, // <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1189464978U, // <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+ 2692277044U, // <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+ 1618535367U, // <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+ 2640775992U, // <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+ 1189465296U, // <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+ 115723574U, // <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+ 2263207289U, // <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+ 2664666780U, // <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+ 115723817U, // <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+ 2263919506U, // <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+ 2222115812U, // <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+ 2692277864U, // <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+ 2692277926U, // <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+ 2324114640U, // <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+ 1190178102U, // <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278202U, // <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+ 2701568053U, // <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+ 1190178345U, // <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 2692278422U, // <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+ 2282981552U, // <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+ 2704222585U, // <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+ 2692278684U, // <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+ 1257016528U, // <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+ 1209239246U, // <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+ 2691910300U, // <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+ 2664683166U, // <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+ 1209239249U, // <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+ 1573027942U, // <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+ 2634826695U, // <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+ 2634827874U, // <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+ 2289629073U, // <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+ 229035318U, // <u,4,4,4>: Cost 1 vspltisw0 RHS
+ 1618537782U, // <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+ 2658718662U, // <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+ 2289629401U, // <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+ 229035318U, // <u,4,4,u>: Cost 1 vspltisw0 RHS
+ 1561092198U, // <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+ 2628863370U, // <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+ 1561094243U, // <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+ 2634836118U, // <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+ 1561095478U, // <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+ 118705462U, // <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+ 604859702U, // <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 2658726906U, // <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+ 604859720U, // <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 2266631058U, // <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+ 2302692152U, // <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+ 2718822906U, // <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+ 2722804309U, // <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+ 2723467942U, // <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+ 1192889654U, // <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2718823224U, // <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+ 2718823246U, // <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+ 1192889897U, // <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 2640822374U, // <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+ 2640823194U, // <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+ 2728113373U, // <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+ 2640825150U, // <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+ 1235815632U, // <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+ 1235814094U, // <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+ 2730767905U, // <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+ 2309556892U, // <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 1235814097U, // <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+ 1561116774U, // <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+ 1618540334U, // <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+ 1561118822U, // <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+ 2692282300U, // <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+ 229035318U, // <u,4,u,4>: Cost 1 vspltisw0 RHS
+ 120368438U, // <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+ 604859945U, // <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 2309565084U, // <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+ 604859963U, // <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+ 2690293760U, // <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+ 1616552038U, // <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2640840434U, // <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+ 2640841536U, // <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+ 1613381970U, // <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+ 2316135642U, // <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+ 2289592834U, // <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+ 2664732324U, // <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+ 1616552661U, // <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+ 1573077094U, // <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 1237536282U, // <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+ 2690294678U, // <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+ 2646821014U, // <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+ 1573080602U, // <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+ 1189466116U, // <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+ 1189466210U, // <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+ 2646823930U, // <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+ 1573082926U, // <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+ 2640855142U, // <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+ 2697594448U, // <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+ 2690295400U, // <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+ 1625179890U, // <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+ 2699585347U, // <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+ 2781171471U, // <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+ 2690295738U, // <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+ 3775318070U, // <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+ 1628498055U, // <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+ 2287627234U, // <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+ 1257016210U, // <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+ 2646836942U, // <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+ 2287625131U, // <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+ 2287627238U, // <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+ 1257016538U, // <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+ 1209240066U, // <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 2287625459U, // <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+ 1209240068U, // <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+ 2640871526U, // <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+ 2316168082U, // <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+ 2640873202U, // <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+ 2640874308U, // <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+ 1637788917U, // <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+ 1616555318U, // <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 2287638591U, // <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+ 2664765096U, // <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+ 1616555561U, // <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+ 1573109862U, // <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+ 2646852404U, // <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+ 2646853224U, // <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+ 2287646618U, // <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+ 1573113374U, // <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+ 296144182U, // <u,5,5,5>: Cost 1 vspltisw1 RHS
+ 1192448098U, // <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+ 2287646946U, // <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+ 296144182U, // <u,5,5,u>: Cost 1 vspltisw1 RHS
+ 1567146086U, // <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+ 2628945300U, // <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+ 2634917997U, // <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+ 1567148870U, // <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+ 1567149366U, // <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+ 2781171799U, // <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+ 1228950018U, // <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+ 27705344U, // <u,5,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,6,u>: Cost 0 copy RHS
+ 2628952166U, // <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+ 1235815314U, // <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+ 2309556734U, // <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+ 2309555115U, // <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+ 2628955446U, // <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+ 1235815642U, // <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+ 1235814914U, // <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+ 2309555443U, // <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+ 1235814916U, // <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+ 1567162470U, // <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+ 1616557870U, // <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+ 2690299781U, // <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+ 1567165256U, // <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+ 1567165750U, // <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+ 296144182U, // <u,5,u,5>: Cost 1 vspltisw1 RHS
+ 1209281026U, // <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+ 27705344U, // <u,5,u,7>: Cost 0 copy RHS
+ 27705344U, // <u,5,u,u>: Cost 0 copy RHS
+ 2705563648U, // <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+ 1631821926U, // <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 2262462970U, // <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+ 2646886941U, // <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+ 2705563986U, // <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+ 2316062652U, // <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+ 2316137272U, // <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+ 1215851830U, // <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 1215851831U, // <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 2634948710U, // <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+ 2705564468U, // <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+ 1189466618U, // <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+ 2263208498U, // <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+ 2693620843U, // <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+ 2652868860U, // <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+ 1189466936U, // <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+ 1213869366U, // <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 1213869367U, // <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 2658844774U, // <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+ 3771344465U, // <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+ 1178554874U, // <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+ 2698929907U, // <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+ 2699593540U, // <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+ 2700257173U, // <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+ 2705565626U, // <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+ 1226485046U, // <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 1226485047U, // <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 2705565846U, // <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+ 2330756585U, // <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+ 2330756829U, // <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+ 2282981734U, // <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+ 1631824413U, // <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+ 2652885246U, // <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+ 1257018168U, // <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+ 135499062U, // <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+ 135499063U, // <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+ 2646917222U, // <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+ 2217365931U, // <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+ 2790167156U, // <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+ 2646919709U, // <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+ 2711538934U, // <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+ 1631825206U, // <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 2316170040U, // <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+ 1215884598U, // <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 1215884599U, // <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 2634981478U, // <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+ 2266190247U, // <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+ 1192448506U, // <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+ 2266190386U, // <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+ 2634984758U, // <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+ 2652901632U, // <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+ 1192448824U, // <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+ 1213902134U, // <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1213902135U, // <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 1583808614U, // <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+ 2322010445U, // <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+ 2718839290U, // <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+ 2670823965U, // <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+ 1583811894U, // <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+ 2724147961U, // <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+ 363253046U, // <u,6,6,6>: Cost 1 vspltisw2 RHS
+ 1229172022U, // <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+ 363253046U, // <u,6,6,u>: Cost 1 vspltisw2 RHS
+ 499458150U, // <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1573200692U, // <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+ 1573201512U, // <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573202070U, // <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499461673U, // <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1573203972U, // <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+ 1235817272U, // <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+ 162073910U, // <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+ 162073911U, // <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+ 499466342U, // <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+ 1631827758U, // <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+ 1573209704U, // <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+ 1573210262U, // <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+ 499469866U, // <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+ 1631828122U, // <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+ 363253046U, // <u,6,u,6>: Cost 1 vspltisw2 RHS
+ 135540022U, // <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+ 135540023U, // <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+ 1638465536U, // <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+ 564723814U, // <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+ 2712207533U, // <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+ 2712207612U, // <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+ 1638465874U, // <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+ 1579192580U, // <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+ 2712207862U, // <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+ 2316137282U, // <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+ 564724381U, // <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+ 1189467130U, // <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+ 1638466356U, // <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+ 1638466454U, // <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+ 2311500282U, // <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+ 1189467494U, // <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+ 2712208495U, // <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+ 2694956302U, // <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+ 1189467756U, // <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+ 1638466940U, // <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+ 2712208829U, // <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+ 2712208927U, // <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+ 1638467176U, // <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+ 1638467238U, // <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+ 2712209165U, // <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+ 2712209256U, // <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+ 1627187175U, // <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+ 2324116290U, // <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+ 1628514441U, // <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+ 1638467734U, // <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+ 2712209638U, // <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+ 2700929387U, // <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+ 1638467996U, // <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+ 1638468098U, // <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+ 2712210002U, // <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+ 1585189856U, // <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+ 1257018178U, // <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+ 1638468382U, // <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+ 1638468498U, // <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+ 2712210378U, // <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+ 2712210485U, // <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+ 2712210564U, // <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+ 1638468816U, // <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+ 564727112U, // <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+ 2712210809U, // <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+ 2712210888U, // <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+ 564727337U, // <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+ 1192449018U, // <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+ 2714201743U, // <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+ 2712211198U, // <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+ 2311533050U, // <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+ 1192449382U, // <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+ 1638469636U, // <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+ 1638469730U, // <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+ 1192449644U, // <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+ 1638469892U, // <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+ 2712211745U, // <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+ 2712211879U, // <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+ 1638470138U, // <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 2712212018U, // <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+ 2712212109U, // <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+ 2712212203U, // <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+ 1638470456U, // <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+ 1638470478U, // <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+ 1638470559U, // <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+ 1235816546U, // <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+ 2309558371U, // <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+ 2641045434U, // <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+ 1235816954U, // <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+ 1235816550U, // <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+ 2309558375U, // <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+ 1585222628U, // <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+ 430361910U, // <u,7,7,7>: Cost 1 vspltisw3 RHS
+ 430361910U, // <u,7,7,u>: Cost 1 vspltisw3 RHS
+ 1638471379U, // <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+ 564729646U, // <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+ 1638471557U, // <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+ 1638471612U, // <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+ 1638471743U, // <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+ 564730010U, // <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+ 1638471888U, // <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+ 430361910U, // <u,7,u,7>: Cost 1 vspltisw3 RHS
+ 564730213U, // <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+ 202162278U, // <u,u,0,0>: Cost 1 vspltisw0 LHS
+ 538189985U, // <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+ 2685673645U, // <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+ 1215848604U, // <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+ 1611931986U, // <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+ 1579266317U, // <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+ 2289592861U, // <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+ 1215851848U, // <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+ 538190493U, // <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+ 1549411025U, // <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+ 115726126U, // <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+ 604862254U, // <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+ 1213866140U, // <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+ 1549413686U, // <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+ 115726490U, // <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+ 1585247207U, // <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+ 1213869384U, // <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+ 604862308U, // <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+ 1567334502U, // <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+ 1190180654U, // <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+ 336380006U, // <u,u,2,2>: Cost 1 vspltisw2 LHS
+ 835584U, // <u,u,2,3>: Cost 0 copy LHS
+ 1567337782U, // <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+ 1190181018U, // <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+ 1611933626U, // <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+ 1226485064U, // <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+ 835584U, // <u,u,2,u>: Cost 0 copy LHS
+ 475685587U, // <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+ 1209239278U, // <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+ 1209239765U, // <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+ 135495836U, // <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+ 475688246U, // <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+ 1209239282U, // <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+ 1209240093U, // <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+ 135499080U, // <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+ 135495841U, // <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+ 1555406950U, // <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+ 1555408301U, // <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+ 2289625301U, // <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+ 1215881372U, // <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+ 229035318U, // <u,u,4,4>: Cost 1 vspltisw0 RHS
+ 538193206U, // <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+ 2289625629U, // <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+ 1215884616U, // <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+ 538193449U, // <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+ 1549443797U, // <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+ 118708014U, // <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+ 1561389191U, // <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+ 1213898908U, // <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+ 1549446454U, // <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+ 118708378U, // <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+ 604862618U, // <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+ 1213902152U, // <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+ 604862636U, // <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+ 1567367270U, // <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+ 1192892206U, // <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+ 1638478330U, // <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+ 1679046864U, // <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+ 1567370550U, // <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+ 1192892570U, // <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+ 363253046U, // <u,u,6,6>: Cost 1 vspltisw2 RHS
+ 27705344U, // <u,u,6,7>: Cost 0 copy RHS
+ 27705344U, // <u,u,6,u>: Cost 0 copy RHS
+ 499605606U, // <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+ 1235812425U, // <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+ 1561405577U, // <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+ 162070684U, // <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+ 499609147U, // <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+ 1235812753U, // <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+ 1235814941U, // <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+ 162073928U, // <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+ 162070689U, // <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+ 475726552U, // <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+ 538195758U, // <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+ 604862821U, // <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+ 835584U, // <u,u,u,3>: Cost 0 copy LHS
+ 475729206U, // <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+ 538196122U, // <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+ 604862861U, // <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+ 27705344U, // <u,u,u,7>: Cost 0 copy RHS
+ 835584U, // <u,u,u,u>: Cost 0 copy LHS
+ 0
+};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 000000000000..9895ee6267aa
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,1007 @@
+//===-- PPCRegisterInfo.cpp - PowerPC Register Information ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCRegisterInfo.h"
+#include "PPC.h"
+#include "PPCFrameLowering.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "reginfo"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
+static cl::opt<bool>
+EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+static cl::opt<bool>
+AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
+ cl::desc("Force the use of a base pointer in every function"));
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
+ : PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
+ ST.isPPC64() ? 0 : 1,
+ ST.isPPC64() ? 0 : 1),
+ Subtarget(ST) {
+ ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
+ ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
+ ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
+ ImmToIdxMap[PPC::LWZ] = PPC::LWZX; ImmToIdxMap[PPC::LWA] = PPC::LWAX;
+ ImmToIdxMap[PPC::LFS] = PPC::LFSX; ImmToIdxMap[PPC::LFD] = PPC::LFDX;
+ ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
+ ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+ ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+ ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32;
+
+ // 64-bit
+ ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+ ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+ ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+ ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
+ const {
+ // Note that PPCInstrInfo::FoldImmediate also directly uses this Kind value
+ // when it checks for ZERO folding.
+ if (Kind == 1) {
+ if (Subtarget.isPPC64())
+ return &PPC::G8RC_NOX0RegClass;
+ return &PPC::GPRC_NOR0RegClass;
+ }
+
+ if (Subtarget.isPPC64())
+ return &PPC::G8RCRegClass;
+ return &PPC::GPRCRegClass;
+}
+
+const MCPhysReg*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_Darwin64_Altivec_SaveList :
+ CSR_Darwin64_SaveList) :
+ (Subtarget.hasAltivec() ?
+ CSR_Darwin32_Altivec_SaveList :
+ CSR_Darwin32_SaveList);
+
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_SVR464_Altivec_SaveList :
+ CSR_SVR464_SaveList) :
+ (Subtarget.hasAltivec() ?
+ CSR_SVR432_Altivec_SaveList :
+ CSR_SVR432_SaveList);
+}
+
+const uint32_t*
+PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (Subtarget.isDarwinABI())
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_Darwin64_Altivec_RegMask :
+ CSR_Darwin64_RegMask) :
+ (Subtarget.hasAltivec() ?
+ CSR_Darwin32_Altivec_RegMask :
+ CSR_Darwin32_RegMask);
+
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_SVR464_Altivec_RegMask :
+ CSR_SVR464_RegMask) :
+ (Subtarget.hasAltivec() ?
+ CSR_SVR432_Altivec_RegMask :
+ CSR_SVR432_RegMask);
+}
+
+const uint32_t*
+PPCRegisterInfo::getNoPreservedMask() const {
+ return CSR_NoRegs_RegMask;
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
+ // The ZERO register is not really a register, but the representation of r0
+ // when used in instructions that treat r0 as the constant 0.
+ Reserved.set(PPC::ZERO);
+ Reserved.set(PPC::ZERO8);
+
+ // The FP register is also not really a register, but is the representation
+ // of the frame pointer register used by ISD::FRAMEADDR.
+ Reserved.set(PPC::FP);
+ Reserved.set(PPC::FP8);
+
+ // The BP register is also not really a register, but is the representation
+ // of the base pointer register used by setjmp.
+ Reserved.set(PPC::BP);
+ Reserved.set(PPC::BP8);
+
+ // The counter registers must be reserved so that counter-based loops can
+ // be correctly formed (and the mtctr instructions are not DCE'd).
+ Reserved.set(PPC::CTR);
+ Reserved.set(PPC::CTR8);
+
+ Reserved.set(PPC::R1);
+ Reserved.set(PPC::LR);
+ Reserved.set(PPC::LR8);
+ Reserved.set(PPC::RM);
+
+ if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
+ Reserved.set(PPC::VRSAVE);
+
+ // The SVR4 ABI reserves r2 and r13
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::R2); // System-reserved register
+ Reserved.set(PPC::R13); // Small Data Area pointer register
+ }
+
+ // On PPC64, r13 is the thread pointer. Never allocate this register.
+ if (Subtarget.isPPC64()) {
+ Reserved.set(PPC::R13);
+
+ Reserved.set(PPC::X1);
+ Reserved.set(PPC::X13);
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::X31);
+
+ if (hasBasePointer(MF))
+ Reserved.set(PPC::X30);
+
+ // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+ if (Subtarget.isSVR4ABI()) {
+ Reserved.set(PPC::X2);
+ }
+ }
+
+ if (PPCFI->needsFP(MF))
+ Reserved.set(PPC::R31);
+
+ if (hasBasePointer(MF)) {
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R29);
+ else
+ Reserved.set(PPC::R30);
+ }
+
+ if (Subtarget.isSVR4ABI() && !Subtarget.isPPC64() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ Reserved.set(PPC::R30);
+
+ // Reserve Altivec registers when Altivec is unavailable.
+ if (!Subtarget.hasAltivec())
+ for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
+ IE = PPC::VRRCRegClass.end(); I != IE; ++I)
+ Reserved.set(*I);
+
+ return Reserved;
+}
+
+unsigned
+PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ const unsigned DefaultSafety = 1;
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case PPC::G8RC_NOX0RegClassID:
+ case PPC::GPRC_NOR0RegClassID:
+ case PPC::G8RCRegClassID:
+ case PPC::GPRCRegClassID: {
+ unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+ return 32 - FP - DefaultSafety;
+ }
+ case PPC::F8RCRegClassID:
+ case PPC::F4RCRegClassID:
+ case PPC::VRRCRegClassID:
+ case PPC::VFRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSHRCRegClassID:
+ return 32 - DefaultSafety;
+ case PPC::VSRCRegClassID:
+ case PPC::VSFRCRegClassID:
+ return 64 - DefaultSafety;
+ case PPC::CRRCRegClassID:
+ return 8 - DefaultSafety;
+ }
+}
+
+const TargetRegisterClass*
+PPCRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC)const {
+ if (Subtarget.hasVSX()) {
+ // With VSX, we can inflate various sub-register classes to the full VSX
+ // register set.
+
+ if (RC == &PPC::F8RCRegClass)
+ return &PPC::VSFRCRegClass;
+ else if (RC == &PPC::VRRCRegClass)
+ return &PPC::VSRCRegClass;
+ }
+
+ return TargetRegisterInfo::getLargestLegalSuperClass(RC);
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame. The sequence of code with be in the general form
+///
+/// addi R0, SP, \#frameSize ; get the address of the previous frame
+/// stwxu R0, SP, Rnegsize ; add and update the SP with the negated size
+/// addi Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ // Determine whether 64-bit pointers are used.
+ bool LP64 = Subtarget.isPPC64();
+ DebugLoc dl = MI.getDebugLoc();
+
+ // Get the maximum call stack size.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+ // Get the total frame size.
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get stack alignments.
+ unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned MaxAlign = MFI->getMaxAlignment();
+ assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
+ "Maximum call-frame size not sufficiently aligned");
+
+ // Determine the previous frame's address. If FrameSize can't be
+ // represented as 16 bits or we need special alignment, then we load the
+ // previous frame's address from 0(SP). Why not do an addis of the hi?
+ // Because R0 is our only safe tmp register and addi/addis treat R0 as zero.
+ // Constructing the constant and adding would take 3 instructions.
+ // Fortunately, a frame greater than 32K is rare.
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+ .addReg(PPC::R31)
+ .addImm(FrameSize);
+ } else if (LP64) {
+ BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+ .addImm(0)
+ .addReg(PPC::X1);
+ } else {
+ BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+ .addImm(0)
+ .addReg(PPC::R1);
+ }
+
+ bool KillNegSizeReg = MI.getOperand(1).isKill();
+ unsigned NegSizeReg = MI.getOperand(1).getReg();
+
+ // Grow the stack and update the stack pointer link, then determine the
+ // address of new allocated space.
+ if (LP64) {
+ if (MaxAlign > TargetAlign) {
+ unsigned UnalNegSizeReg = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
+
+ // Unfortunately, there is no andi, only andi., and we can't insert that
+ // here because we might clobber cr0 while it is live.
+ BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg)
+ .addImm(~(MaxAlign-1));
+
+ unsigned NegSizeReg1 = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
+ BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg)
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
+ KillNegSizeReg = true;
+ }
+
+ BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::X1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
+ } else {
+ if (MaxAlign > TargetAlign) {
+ unsigned UnalNegSizeReg = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
+
+ // Unfortunately, there is no andi, only andi., and we can't insert that
+ // here because we might clobber cr0 while it is live.
+ BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg)
+ .addImm(~(MaxAlign-1));
+
+ unsigned NegSizeReg1 = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
+ BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg)
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
+ KillNegSizeReg = true;
+ }
+
+ BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
+ .addReg(Reg, RegState::Kill)
+ .addReg(PPC::R1)
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
+ }
+
+ // Discard the DYNALLOC instruction.
+ MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+/// mfcr rA ; Move the conditional register into GPR rA.
+/// rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+/// stw rA, FI ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CR <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ // We need to store the CR in the low 4-bits of the saved value. First, issue
+ // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ // If the saved register wasn't CR0, shift the bits left so that they are in
+ // CR0's slot.
+ if (SrcReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg) * 4)
+ .addImm(0)
+ .addImm(31);
+ }
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CR <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CR does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ // If the reloaded register isn't CR0, shift the bits right so that they are
+ // in the right CR's slot.
+ if (DestReg != PPC::CR0) {
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ unsigned ShiftBits = getEncodingValue(DestReg)*4;
+ // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill).addImm(32-ShiftBits).addImm(0)
+ .addImm(31);
+ }
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), DestReg)
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+static unsigned getCRFromCRBit(unsigned SrcReg) {
+ unsigned Reg = 0;
+ if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+ SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+ Reg = PPC::CR0;
+ else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+ SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+ Reg = PPC::CR1;
+ else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+ SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+ Reg = PPC::CR2;
+ else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+ SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+ Reg = PPC::CR3;
+ else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+ SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+ Reg = PPC::CR4;
+ else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+ SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+ Reg = PPC::CR5;
+ else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+ SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+ Reg = PPC::CR6;
+ else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+ SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+ Reg = PPC::CR7;
+
+ assert(Reg != 0 && "Invalid CR bit register");
+ return Reg;
+}
+
+void PPCRegisterInfo::lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_CRBIT <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::KILL),
+ getCRFromCRBit(SrcReg))
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
+ .addReg(getCRFromCRBit(SrcReg));
+
+ // If the saved register wasn't CR0LT, shift the bits left so that the bit to
+ // store is the first one. Mask all but that bit.
+ unsigned Reg1 = Reg;
+ Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+
+ // rlwinm rA, rA, ShiftBits, 0, 0.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWINM8 : PPC::RLWINM), Reg)
+ .addReg(Reg1, RegState::Kill)
+ .addImm(getEncodingValue(SrcReg))
+ .addImm(0).addImm(0);
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_CRBIT <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ bool LP64 = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_CRBIT does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::LWZ8 : PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(TargetOpcode::IMPLICIT_DEF), DestReg);
+
+ unsigned RegO = MF.getRegInfo().createVirtualRegister(LP64 ? G8RC : GPRC);
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), RegO)
+ .addReg(getCRFromCRBit(DestReg));
+
+ unsigned ShiftBits = getEncodingValue(DestReg);
+ // rlwimi r11, r10, 32-ShiftBits, ..., ...
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::RLWIMI8 : PPC::RLWIMI), RegO)
+ .addReg(RegO, RegState::Kill).addReg(Reg, RegState::Kill)
+ .addImm(ShiftBits ? 32-ShiftBits : 0)
+ .addImm(ShiftBits).addImm(ShiftBits);
+
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF),
+ getCRFromCRBit(DestReg))
+ .addReg(RegO, RegState::Kill)
+ // Make sure we have a use dependency all the way through this
+ // sequence of instructions. We can't have the other bits in the CR
+ // modified in between the mfocrf and the mtocrf.
+ .addReg(getCRFromCRBit(DestReg), RegState::Implicit);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; SPILL_VRSAVE <SrcReg>, <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned SrcReg = MI.getOperand(0).getReg();
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MFVRSAVEv), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+ .addReg(Reg, RegState::Kill),
+ FrameIndex);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ // Get the instruction.
+ MachineInstr &MI = *II; // ; <DestReg> = RESTORE_VRSAVE <offset>
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ unsigned Reg = MF.getRegInfo().createVirtualRegister(GPRC);
+ unsigned DestReg = MI.getOperand(0).getReg();
+ assert(MI.definesRegister(DestReg) &&
+ "RESTORE_VRSAVE does not define its destination");
+
+ addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::LWZ),
+ Reg), FrameIndex);
+
+ BuildMI(MBB, II, dl, TII.get(PPC::MTVRSAVEv), DestReg)
+ .addReg(Reg, RegState::Kill);
+
+ // Discard the pseudo instruction.
+ MBB.erase(II);
+}
+
+bool
+PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+ unsigned Reg, int &FrameIdx) const {
+
+ // For the nonvolatile condition registers (CR2, CR3, CR4) in an SVR4
+ // ABI, return true to prevent allocating an additional frame slot.
+ // For 64-bit, the CR save area is at SP+8; the value of FrameIdx = 0
+ // is arbitrary and will be subsequently ignored. For 32-bit, we have
+ // previously created the stack slot if needed, so return its FrameIdx.
+ if (Subtarget.isSVR4ABI() && PPC::CR2 <= Reg && Reg <= PPC::CR4) {
+ if (Subtarget.isPPC64())
+ FrameIdx = 0;
+ else {
+ const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ FrameIdx = FI->getCRSpillFrameIndex();
+ }
+ return true;
+ }
+ return false;
+}
+
+// Figure out if the offset in the instruction must be a multiple of 4.
+// This is true for instructions like "STD".
+static bool usesIXAddr(const MachineInstr &MI) {
+ unsigned OpC = MI.getOpcode();
+
+ switch (OpC) {
+ default:
+ return false;
+ case PPC::LWA:
+ case PPC::LWA_32:
+ case PPC::LD:
+ case PPC::STD:
+ return true;
+ }
+}
+
+// Return the OffsetOperandNo given the FIOperandNum (and the instruction).
+static unsigned getOffsetONFromFION(const MachineInstr &MI,
+ unsigned FIOperandNum) {
+ // Take into account whether it's an add or mem instruction
+ unsigned OffsetOperandNo = (FIOperandNum == 2) ? 1 : 2;
+ if (MI.isInlineAsm())
+ OffsetOperandNo = FIOperandNum-1;
+
+ return OffsetOperandNo;
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ // Get the instruction.
+ MachineInstr &MI = *II;
+ // Get the instruction's basic block.
+ MachineBasicBlock &MBB = *MI.getParent();
+ // Get the basic block's function.
+ MachineFunction &MF = *MBB.getParent();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ // Get the frame info.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = MI.getDebugLoc();
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+
+ // Get the frame index.
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+
+ // Get the frame pointer save index. Users of this index are primarily
+ // DYNALLOC instructions.
+ PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+ int FPSI = FI->getFramePointerSaveIndex();
+ // Get the instruction opcode.
+ unsigned OpC = MI.getOpcode();
+
+ // Special case for dynamic alloca.
+ if (FPSI && FrameIndex == FPSI &&
+ (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+ lowerDynamicAlloc(II);
+ return;
+ }
+
+ // Special case for pseudo-ops SPILL_CR and RESTORE_CR, etc.
+ if (OpC == PPC::SPILL_CR) {
+ lowerCRSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CR) {
+ lowerCRRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_CRBIT) {
+ lowerCRBitSpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_CRBIT) {
+ lowerCRBitRestore(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::SPILL_VRSAVE) {
+ lowerVRSAVESpilling(II, FrameIndex);
+ return;
+ } else if (OpC == PPC::RESTORE_VRSAVE) {
+ lowerVRSAVERestore(II, FrameIndex);
+ return;
+ }
+
+ // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+ MI.getOperand(FIOperandNum).ChangeToRegister(
+ FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
+
+ // Figure out if the offset in the instruction is shifted right two bits.
+ bool isIXAddr = usesIXAddr(MI);
+
+ // If the instruction is not present in ImmToIdxMap, then it has no immediate
+ // form (and must be r+r).
+ bool noImmForm = !MI.isInlineAsm() && !ImmToIdxMap.count(OpC);
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+
+ // If we're not using a Frame Pointer that has been set to the value of the
+ // SP before having the stack size subtracted from it, then add the stack size
+ // to Offset to get the correct offset.
+ // Naked functions have stack size 0, although getStackSize may not reflect that
+ // because we didn't call all the pieces that compute it for naked functions.
+ if (!MF.getFunction()->getAttributes().
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) {
+ if (!(hasBasePointer(MF) && FrameIndex < 0))
+ Offset += MFI->getStackSize();
+ }
+
+ // If we can, encode the offset directly into the instruction. If this is a
+ // normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
+ // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+ // clear can be encoded. This is extremely uncommon, because normally you
+ // only "std" to a stack slot that is at least 4-byte aligned, but it can
+ // happen in invalid code.
+ assert(OpC != PPC::DBG_VALUE &&
+ "This should be handle in a target independent way");
+ if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // The offset doesn't fit into a single register, scavenge one to build the
+ // offset in.
+
+ bool is64Bit = Subtarget.isPPC64();
+ const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+ const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+ const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
+ unsigned SRegHi = MF.getRegInfo().createVirtualRegister(RC),
+ SReg = MF.getRegInfo().createVirtualRegister(RC);
+
+ // Insert a set of rA with the full offset value before the ld, st, or add
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::LIS8 : PPC::LIS), SRegHi)
+ .addImm(Offset >> 16);
+ BuildMI(MBB, II, dl, TII.get(is64Bit ? PPC::ORI8 : PPC::ORI), SReg)
+ .addReg(SRegHi, RegState::Kill)
+ .addImm(Offset);
+
+ // Convert into indexed form of the instruction:
+ //
+ // sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+ // addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+ unsigned OperandBase;
+
+ if (noImmForm)
+ OperandBase = 1;
+ else if (OpC != TargetOpcode::INLINEASM) {
+ assert(ImmToIdxMap.count(OpC) &&
+ "No indexed form of load or store available!");
+ unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+ MI.setDesc(TII.get(NewOpcode));
+ OperandBase = 1;
+ } else {
+ OperandBase = OffsetOperandNo;
+ }
+
+ unsigned StackReg = MI.getOperand(FIOperandNum).getReg();
+ MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+ MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false, false, true);
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ if (!Subtarget.isPPC64())
+ return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+ else
+ return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ if (!hasBasePointer(MF))
+ return getFrameRegister(MF);
+
+ if (Subtarget.isPPC64())
+ return PPC::X30;
+
+ if (Subtarget.isSVR4ABI() &&
+ MF.getTarget().getRelocationModel() == Reloc::PIC_)
+ return PPC::R29;
+
+ return PPC::R30;
+}
+
+bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ if (!EnableBasePointer)
+ return false;
+ if (AlwaysBasePointer)
+ return true;
+
+ // If we need to realign the stack, then the stack pointer can no longer
+ // serve as an offset into the caller's stack space. As a result, we need a
+ // base pointer.
+ return needsStackRealignment(MF);
+}
+
+bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ return false;
+
+ return true;
+}
+
+bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
+}
+
+/// Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool PPCRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+ assert(Offset < 0 && "Local offset must be negative");
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores that have
+ // an r+i form. Return false for everything else.
+ unsigned OpC = MI->getOpcode();
+ if (!ImmToIdxMap.count(OpC))
+ return false;
+
+ // Don't generate a new virtual base register just to add zero to it.
+ if ((OpC == PPC::ADDI || OpC == PPC::ADDI8) &&
+ MI->getOperand(2).getImm() == 0)
+ return false;
+
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
+
+ const PPCFrameLowering *PPCFI =
+ static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+ unsigned StackEst =
+ PPCFI->determineFrameLayout(MF, false, true);
+
+ // If we likely don't need a stack frame, then we probably don't need a
+ // virtual base register either.
+ if (!StackEst)
+ return false;
+
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset += StackEst;
+
+ // The frame pointer will point to the end of the stack, so estimate the
+ // offset as the difference between the object offset and the FP location.
+ return !isFrameOffsetLegal(MI, Offset);
+}
+
+/// Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void PPCRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const {
+ unsigned ADDriOpc = Subtarget.isPPC64() ? PPC::ADDI8 : PPC::ADDI;
+
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MachineFunction &MF = *MBB->getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = TII.get(ADDriOpc);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
+
+ BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx).addImm(Offset);
+}
+
+void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI.getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+ unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
+ MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ const MCInstrDesc &MCID = MI.getDesc();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.constrainRegClass(BaseReg,
+ TII.getRegClass(MCID, FIOperandNum, this, MF));
+}
+
+bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ unsigned FIOperandNum = 0;
+ while (!MI->getOperand(FIOperandNum).isFI()) {
+ ++FIOperandNum;
+ assert(FIOperandNum < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
+
+ return MI->getOpcode() == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
+ (isInt<16>(Offset) && (!usesIXAddr(*MI) || (Offset & 3) == 0));
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 000000000000..13a35f6309d4
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,113 @@
+//===-- PPCRegisterInfo.h - PowerPC Register Information Impl ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include "llvm/ADT/DenseMap.h"
+
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+ DenseMap<unsigned, unsigned> ImmToIdxMap;
+ const PPCSubtarget &Subtarget;
+public:
+ PPCRegisterInfo(const PPCSubtarget &SubTarget);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override;
+
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
+
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
+
+ /// Code Generation virtual methods...
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
+ const uint32_t *getNoPreservedMask() const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
+
+ /// We require the register scavenger.
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override {
+ return true;
+ }
+
+ void lowerDynamicAlloc(MachineBasicBlock::iterator II) const;
+ void lowerCRSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRBitSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerCRBitRestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVESpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+ void lowerVRSAVERestore(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const;
+
+ bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+ int &FrameIdx) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+
+ // Support for virtual base registers.
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg, int FrameIdx,
+ int64_t Offset) const override;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const override;
+ bool isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const override;
+
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
+
+ // Base pointer (stack realignment) support.
+ unsigned getBaseRegister(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 000000000000..b3d145b2cc49
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,314 @@
+//===-- PPCRegisterInfo.td - The PowerPC Register File -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex<1>;
+def sub_gt : SubRegIndex<1, 1>;
+def sub_eq : SubRegIndex<1, 2>;
+def sub_un : SubRegIndex<1, 3>;
+def sub_32 : SubRegIndex<32>;
+def sub_64 : SubRegIndex<64>;
+def sub_128 : SubRegIndex<128>;
+}
+
+
+class PPCReg<string n> : Register<n> {
+ let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+ let HWEncoding{9-0} = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// VF - One of the 32 64-bit floating-point subregisters of the vector
+// registers (used by VSX).
+class VF<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+ let HWEncoding{5} = 1;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<VF SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 0;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRL - One of the 32 128-bit VSX registers that overlap with the scalar
+// floating-point registers.
+class VSRL<FPR SubReg, string n> : PPCReg<n> {
+ let HWEncoding = SubReg.HWEncoding;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_64];
+}
+
+// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
+// registers.
+class VSRH<VR SubReg, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
+ let HWEncoding{5} = 1;
+ let SubRegs = [SubReg];
+ let SubRegIndices = [sub_128];
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+ let HWEncoding{2-0} = num;
+ let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+ let HWEncoding{4-0} = num;
+}
+
+// General-purpose registers
+foreach Index = 0-31 in {
+ def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
+}
+
+// 64-bit General-purpose registers
+foreach Index = 0-31 in {
+ def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
+ DwarfRegNum<[Index, -2]>;
+}
+
+// Floating-point registers
+foreach Index = 0-31 in {
+ def F#Index : FPR<Index, "f"#Index>,
+ DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
+}
+
+// Floating-point vector subregisters (for VSX)
+foreach Index = 0-31 in {
+ def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+}
+
+// Vector registers
+foreach Index = 0-31 in {
+ def V#Index : VR<!cast<VF>("VF"#Index), "v"#Index>,
+ DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
+}
+
+// VSX registers
+foreach Index = 0-31 in {
+ def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
+ DwarfRegAlias<!cast<FPR>("F"#Index)>;
+}
+foreach Index = 0-31 in {
+ def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
+ DwarfRegAlias<!cast<VR>("V"#Index)>;
+}
+
+// The reprsentation of r0 when treated as the constant 0.
+def ZERO : GPR<0, "0">;
+def ZERO8 : GP8<ZERO, "0">;
+
+// Representations of the frame pointer used by ISD::FRAMEADDR.
+def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
+def FP8 : GP8<FP, "**FRAME POINTER**">;
+
+// Representations of the base pointer used by setjmp.
+def BP : GPR<0 /* arbitrary */, "**BASE POINTER**">;
+def BP8 : GP8<BP, "**BASE POINTER**">;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">;
+def CR0GT : CRBIT< 1, "1">;
+def CR0EQ : CRBIT< 2, "2">;
+def CR0UN : CRBIT< 3, "3">;
+def CR1LT : CRBIT< 4, "4">;
+def CR1GT : CRBIT< 5, "5">;
+def CR1EQ : CRBIT< 6, "6">;
+def CR1UN : CRBIT< 7, "7">;
+def CR2LT : CRBIT< 8, "8">;
+def CR2GT : CRBIT< 9, "9">;
+def CR2EQ : CRBIT<10, "10">;
+def CR2UN : CRBIT<11, "11">;
+def CR3LT : CRBIT<12, "12">;
+def CR3GT : CRBIT<13, "13">;
+def CR3EQ : CRBIT<14, "14">;
+def CR3UN : CRBIT<15, "15">;
+def CR4LT : CRBIT<16, "16">;
+def CR4GT : CRBIT<17, "17">;
+def CR4EQ : CRBIT<18, "18">;
+def CR4UN : CRBIT<19, "19">;
+def CR5LT : CRBIT<20, "20">;
+def CR5GT : CRBIT<21, "21">;
+def CR5EQ : CRBIT<22, "22">;
+def CR5UN : CRBIT<23, "23">;
+def CR6LT : CRBIT<24, "24">;
+def CR6GT : CRBIT<25, "25">;
+def CR6EQ : CRBIT<26, "26">;
+def CR6UN : CRBIT<27, "27">;
+def CR7LT : CRBIT<28, "28">;
+def CR7GT : CRBIT<29, "29">;
+def CR7EQ : CRBIT<30, "30">;
+def CR7UN : CRBIT<31, "31">;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68, 68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69, 69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70, 70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71, 71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72, 72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73, 73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
+}
+
+// The full condition-code register. This is not modeled fully, but defined
+// here primarily, for compatibility with gcc, to allow the inline asm "cc"
+// clobber specification to work.
+def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
+ let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
+}
+
+// Link register
+def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65, -2]>;
+
+// Count register
+def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
+
+// Carry bit. In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1); this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">;
+
+// FP rounding mode: bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs. The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+ (sequence "R%u", 30, 13),
+ R31, R0, R1, FP, BP)>;
+
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+ (sequence "X%u", 30, 14),
+ X31, X13, X0, X1, FP8, BP8)>;
+
+// For some instructions r0 is special (representing the value 0 instead of
+// the value in the r0 register), and we use these register subclasses to
+// prevent r0 from being allocated for use by those instructions.
+def GPRC_NOR0 : RegisterClass<"PPC", [i32], 32, (add (sub GPRC, R0), ZERO)>;
+def G8RC_NOX0 : RegisterClass<"PPC", [i64], 64, (add (sub G8RC, X0), ZERO8)>;
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+ (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+ V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+ V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
+
+// VSX register classes (the allocation order mirrors that of the corresponding
+// subregister classes).
+def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add (sequence "VSL%u", 0, 13),
+ (sequence "VSL%u", 31, 14))>;
+def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
+ VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
+ VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
+ VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
+ VSH22, VSH21, VSH20)>;
+def VSRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
+ (add VSLRC, VSHRC)>;
+
+// Register classes for the 64-bit "scalar" VSX subregisters.
+def VFRC : RegisterClass<"PPC", [f64], 64,
+ (add VF2, VF3, VF4, VF5, VF0, VF1, VF6, VF7,
+ VF8, VF9, VF10, VF11, VF12, VF13, VF14,
+ VF15, VF16, VF17, VF18, VF19, VF31, VF30,
+ VF29, VF28, VF27, VF26, VF25, VF24, VF23,
+ VF22, VF21, VF20)>;
+def VSFRC : RegisterClass<"PPC", [f64], 64, (add F8RC, VFRC)>;
+
+def CRBITRC : RegisterClass<"PPC", [i1], 32,
+ (add CR2LT, CR2GT, CR2EQ, CR2UN,
+ CR3LT, CR3GT, CR3EQ, CR3UN,
+ CR4LT, CR4GT, CR4EQ, CR4UN,
+ CR5LT, CR5GT, CR5EQ, CR5UN,
+ CR6LT, CR6GT, CR6EQ, CR6UN,
+ CR7LT, CR7GT, CR7EQ, CR7UN,
+ CR1LT, CR1GT, CR1EQ, CR1UN,
+ CR0LT, CR0GT, CR0EQ, CR0UN)> {
+ let Size = 32;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+ CR7, CR2, CR3, CR4)>;
+
+// The CTR registers are not allocatable because they're used by the
+// decrement-and-branch instructions, and thus need to stay live across
+// multiple basic blocks.
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)> {
+ let isAllocatable = 0;
+}
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)> {
+ let isAllocatable = 0;
+}
+
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
+ let CopyCost = -1;
+}
+
+def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
+ let isAllocatable = 0;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 000000000000..0b392f99b6d7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===-- PPCRelocations.h - PPC Code Relocations -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCRELOCATIONS_H
+#define PPCRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+ namespace PPC {
+ enum RelocationType {
+ // reloc_vanilla - A standard relocation, where the address of the
+ // relocated object completely overwrites the address of the relocation.
+ reloc_vanilla,
+
+ // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+ reloc_pcrel_bx,
+
+ // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+ // and other bcx instructions.
+ reloc_pcrel_bcx,
+
+ // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+ // (which is really addis). Add the high 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_high,
+
+ // reloc_absolute_low - Absolute relocation, for the la instruction (which
+ // is really an addi). Add the low 16-bits of the specified global
+ // address into the low 16-bits of the instruction.
+ reloc_absolute_low,
+
+ // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+ // instruction which have two implicit zero bits.
+ reloc_absolute_low_ix
+ };
+ }
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 000000000000..1221d4149996
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,520 @@
+//===-- PPCSchedule.td - PowerPC Scheduling Definitions ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IIC_IntSimple : InstrItinClass;
+def IIC_IntGeneral : InstrItinClass;
+def IIC_IntCompare : InstrItinClass;
+def IIC_IntDivD : InstrItinClass;
+def IIC_IntDivW : InstrItinClass;
+def IIC_IntMFFS : InstrItinClass;
+def IIC_IntMFVSCR : InstrItinClass;
+def IIC_IntMTFSB0 : InstrItinClass;
+def IIC_IntMTSRD : InstrItinClass;
+def IIC_IntMulHD : InstrItinClass;
+def IIC_IntMulHW : InstrItinClass;
+def IIC_IntMulHWU : InstrItinClass;
+def IIC_IntMulLI : InstrItinClass;
+def IIC_IntRFID : InstrItinClass;
+def IIC_IntRotateD : InstrItinClass;
+def IIC_IntRotateDI : InstrItinClass;
+def IIC_IntRotate : InstrItinClass;
+def IIC_IntShift : InstrItinClass;
+def IIC_IntTrapD : InstrItinClass;
+def IIC_IntTrapW : InstrItinClass;
+def IIC_BrB : InstrItinClass;
+def IIC_BrCR : InstrItinClass;
+def IIC_BrMCR : InstrItinClass;
+def IIC_BrMCRX : InstrItinClass;
+def IIC_LdStDCBA : InstrItinClass;
+def IIC_LdStDCBF : InstrItinClass;
+def IIC_LdStDCBI : InstrItinClass;
+def IIC_LdStLoad : InstrItinClass;
+def IIC_LdStLoadUpd : InstrItinClass;
+def IIC_LdStLoadUpdX : InstrItinClass;
+def IIC_LdStStore : InstrItinClass;
+def IIC_LdStStoreUpd : InstrItinClass;
+def IIC_LdStDSS : InstrItinClass;
+def IIC_LdStICBI : InstrItinClass;
+def IIC_LdStLD : InstrItinClass;
+def IIC_LdStLDU : InstrItinClass;
+def IIC_LdStLDUX : InstrItinClass;
+def IIC_LdStLDARX : InstrItinClass;
+def IIC_LdStLFD : InstrItinClass;
+def IIC_LdStLFDU : InstrItinClass;
+def IIC_LdStLFDUX : InstrItinClass;
+def IIC_LdStLHA : InstrItinClass;
+def IIC_LdStLHAU : InstrItinClass;
+def IIC_LdStLHAUX : InstrItinClass;
+def IIC_LdStLMW : InstrItinClass;
+def IIC_LdStLVecX : InstrItinClass;
+def IIC_LdStLWA : InstrItinClass;
+def IIC_LdStLWARX : InstrItinClass;
+def IIC_LdStSLBIA : InstrItinClass;
+def IIC_LdStSLBIE : InstrItinClass;
+def IIC_LdStSTD : InstrItinClass;
+def IIC_LdStSTDCX : InstrItinClass;
+def IIC_LdStSTDU : InstrItinClass;
+def IIC_LdStSTDUX : InstrItinClass;
+def IIC_LdStSTFD : InstrItinClass;
+def IIC_LdStSTFDU : InstrItinClass;
+def IIC_LdStSTVEBX : InstrItinClass;
+def IIC_LdStSTWCX : InstrItinClass;
+def IIC_LdStSync : InstrItinClass;
+def IIC_SprISYNC : InstrItinClass;
+def IIC_SprMFSR : InstrItinClass;
+def IIC_SprMTMSR : InstrItinClass;
+def IIC_SprMTSR : InstrItinClass;
+def IIC_SprTLBSYNC : InstrItinClass;
+def IIC_SprMFCR : InstrItinClass;
+def IIC_SprMFCRF : InstrItinClass;
+def IIC_SprMFMSR : InstrItinClass;
+def IIC_SprMFSPR : InstrItinClass;
+def IIC_SprMFTB : InstrItinClass;
+def IIC_SprMTSPR : InstrItinClass;
+def IIC_SprMTSRIN : InstrItinClass;
+def IIC_SprRFI : InstrItinClass;
+def IIC_SprSC : InstrItinClass;
+def IIC_FPGeneral : InstrItinClass;
+def IIC_FPAddSub : InstrItinClass;
+def IIC_FPCompare : InstrItinClass;
+def IIC_FPDivD : InstrItinClass;
+def IIC_FPDivS : InstrItinClass;
+def IIC_FPFused : InstrItinClass;
+def IIC_FPRes : InstrItinClass;
+def IIC_FPSqrtD : InstrItinClass;
+def IIC_FPSqrtS : InstrItinClass;
+def IIC_VecGeneral : InstrItinClass;
+def IIC_VecFP : InstrItinClass;
+def IIC_VecFPCompare : InstrItinClass;
+def IIC_VecComplex : InstrItinClass;
+def IIC_VecPerm : InstrItinClass;
+def IIC_VecFPRound : InstrItinClass;
+def IIC_VecVSL : InstrItinClass;
+def IIC_VecVSR : InstrItinClass;
+def IIC_SprMTMSRD : InstrItinClass;
+def IIC_SprSLIE : InstrItinClass;
+def IIC_SprSLBIE : InstrItinClass;
+def IIC_SprSLBMTE : InstrItinClass;
+def IIC_SprSLBMFEE : InstrItinClass;
+def IIC_SprSLBIA : InstrItinClass;
+def IIC_SprTLBIEL : InstrItinClass;
+def IIC_SprTLBIE : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCSchedule440.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+include "PPCScheduleP7.td"
+include "PPCScheduleA2.td"
+include "PPCScheduleE500mc.td"
+include "PPCScheduleE5500.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+// opcode itinerary class
+// ====== ===============
+// add IIC_IntSimple
+// addc IIC_IntGeneral
+// adde IIC_IntGeneral
+// addi IIC_IntSimple
+// addic IIC_IntGeneral
+// addic. IIC_IntGeneral
+// addis IIC_IntSimple
+// addme IIC_IntGeneral
+// addze IIC_IntGeneral
+// and IIC_IntSimple
+// andc IIC_IntSimple
+// andi. IIC_IntGeneral
+// andis. IIC_IntGeneral
+// b IIC_BrB
+// bc IIC_BrB
+// bcctr IIC_BrB
+// bclr IIC_BrB
+// cmp IIC_IntCompare
+// cmpi IIC_IntCompare
+// cmpl IIC_IntCompare
+// cmpli IIC_IntCompare
+// cntlzd IIC_IntRotateD
+// cntlzw IIC_IntGeneral
+// crand IIC_BrCR
+// crandc IIC_BrCR
+// creqv IIC_BrCR
+// crnand IIC_BrCR
+// crnor IIC_BrCR
+// cror IIC_BrCR
+// crorc IIC_BrCR
+// crxor IIC_BrCR
+// dcba IIC_LdStDCBA
+// dcbf IIC_LdStDCBF
+// dcbi IIC_LdStDCBI
+// dcbst IIC_LdStDCBF
+// dcbt IIC_LdStLoad
+// dcbtst IIC_LdStLoad
+// dcbz IIC_LdStDCBF
+// divd IIC_IntDivD
+// divdu IIC_IntDivD
+// divw IIC_IntDivW
+// divwu IIC_IntDivW
+// dss IIC_LdStDSS
+// dst IIC_LdStDSS
+// dstst IIC_LdStDSS
+// eciwx IIC_LdStLoad
+// ecowx IIC_LdStLoad
+// eieio IIC_LdStLoad
+// eqv IIC_IntSimple
+// extsb IIC_IntSimple
+// extsh IIC_IntSimple
+// extsw IIC_IntSimple
+// fabs IIC_FPGeneral
+// fadd IIC_FPAddSub
+// fadds IIC_FPGeneral
+// fcfid IIC_FPGeneral
+// fcmpo IIC_FPCompare
+// fcmpu IIC_FPCompare
+// fctid IIC_FPGeneral
+// fctidz IIC_FPGeneral
+// fctiw IIC_FPGeneral
+// fctiwz IIC_FPGeneral
+// fdiv IIC_FPDivD
+// fdivs IIC_FPDivS
+// fmadd IIC_FPFused
+// fmadds IIC_FPGeneral
+// fmr IIC_FPGeneral
+// fmsub IIC_FPFused
+// fmsubs IIC_FPGeneral
+// fmul IIC_FPFused
+// fmuls IIC_FPGeneral
+// fnabs IIC_FPGeneral
+// fneg IIC_FPGeneral
+// fnmadd IIC_FPFused
+// fnmadds IIC_FPGeneral
+// fnmsub IIC_FPFused
+// fnmsubs IIC_FPGeneral
+// fres IIC_FPRes
+// frsp IIC_FPGeneral
+// frsqrte IIC_FPGeneral
+// fsel IIC_FPGeneral
+// fsqrt IIC_FPSqrtD
+// fsqrts IIC_FPSqrtS
+// fsub IIC_FPAddSub
+// fsubs IIC_FPGeneral
+// icbi IIC_LdStICBI
+// isync IIC_SprISYNC
+// lbz IIC_LdStLoad
+// lbzu IIC_LdStLoadUpd
+// lbzux IIC_LdStLoadUpdX
+// lbzx IIC_LdStLoad
+// ld IIC_LdStLD
+// ldarx IIC_LdStLDARX
+// ldu IIC_LdStLDU
+// ldux IIC_LdStLDUX
+// ldx IIC_LdStLD
+// lfd IIC_LdStLFD
+// lfdu IIC_LdStLFDU
+// lfdux IIC_LdStLFDUX
+// lfdx IIC_LdStLFD
+// lfs IIC_LdStLFD
+// lfsu IIC_LdStLFDU
+// lfsux IIC_LdStLFDUX
+// lfsx IIC_LdStLFD
+// lha IIC_LdStLHA
+// lhau IIC_LdStLHAU
+// lhaux IIC_LdStLHAUX
+// lhax IIC_LdStLHA
+// lhbrx IIC_LdStLoad
+// lhz IIC_LdStLoad
+// lhzu IIC_LdStLoadUpd
+// lhzux IIC_LdStLoadUpdX
+// lhzx IIC_LdStLoad
+// lmw IIC_LdStLMW
+// lswi IIC_LdStLMW
+// lswx IIC_LdStLMW
+// lvebx IIC_LdStLVecX
+// lvehx IIC_LdStLVecX
+// lvewx IIC_LdStLVecX
+// lvsl IIC_LdStLVecX
+// lvsr IIC_LdStLVecX
+// lvx IIC_LdStLVecX
+// lvxl IIC_LdStLVecX
+// lwa IIC_LdStLWA
+// lwarx IIC_LdStLWARX
+// lwaux IIC_LdStLHAUX
+// lwax IIC_LdStLHA
+// lwbrx IIC_LdStLoad
+// lwz IIC_LdStLoad
+// lwzu IIC_LdStLoadUpd
+// lwzux IIC_LdStLoadUpdX
+// lwzx IIC_LdStLoad
+// mcrf IIC_BrMCR
+// mcrfs IIC_FPGeneral
+// mcrxr IIC_BrMCRX
+// mfcr IIC_SprMFCR
+// mffs IIC_IntMFFS
+// mfmsr IIC_SprMFMSR
+// mfspr IIC_SprMFSPR
+// mfsr IIC_SprMFSR
+// mfsrin IIC_SprMFSR
+// mftb IIC_SprMFTB
+// mfvscr IIC_IntMFVSCR
+// mtcrf IIC_BrMCRX
+// mtfsb0 IIC_IntMTFSB0
+// mtfsb1 IIC_IntMTFSB0
+// mtfsf IIC_IntMTFSB0
+// mtfsfi IIC_IntMTFSB0
+// mtmsr IIC_SprMTMSR
+// mtmsrd IIC_LdStLD
+// mtspr IIC_SprMTSPR
+// mtsr IIC_SprMTSR
+// mtsrd IIC_IntMTSRD
+// mtsrdin IIC_IntMTSRD
+// mtsrin IIC_SprMTSRIN
+// mtvscr IIC_IntMFVSCR
+// mulhd IIC_IntMulHD
+// mulhdu IIC_IntMulHD
+// mulhw IIC_IntMulHW
+// mulhwu IIC_IntMulHWU
+// mulld IIC_IntMulHD
+// mulli IIC_IntMulLI
+// mullw IIC_IntMulHW
+// nand IIC_IntSimple
+// neg IIC_IntSimple
+// nor IIC_IntSimple
+// or IIC_IntSimple
+// orc IIC_IntSimple
+// ori IIC_IntSimple
+// oris IIC_IntSimple
+// rfi IIC_SprRFI
+// rfid IIC_IntRFID
+// rldcl IIC_IntRotateD
+// rldcr IIC_IntRotateD
+// rldic IIC_IntRotateDI
+// rldicl IIC_IntRotateDI
+// rldicr IIC_IntRotateDI
+// rldimi IIC_IntRotateDI
+// rlwimi IIC_IntRotate
+// rlwinm IIC_IntGeneral
+// rlwnm IIC_IntGeneral
+// sc IIC_SprSC
+// slbia IIC_LdStSLBIA
+// slbie IIC_LdStSLBIE
+// sld IIC_IntRotateD
+// slw IIC_IntGeneral
+// srad IIC_IntRotateD
+// sradi IIC_IntRotateDI
+// sraw IIC_IntShift
+// srawi IIC_IntShift
+// srd IIC_IntRotateD
+// srw IIC_IntGeneral
+// stb IIC_LdStStore
+// stbu IIC_LdStStoreUpd
+// stbux IIC_LdStStoreUpd
+// stbx IIC_LdStStore
+// std IIC_LdStSTD
+// stdcx. IIC_LdStSTDCX
+// stdu IIC_LdStSTDU
+// stdux IIC_LdStSTDUX
+// stdx IIC_LdStSTD
+// stfd IIC_LdStSTFD
+// stfdu IIC_LdStSTFDU
+// stfdux IIC_LdStSTFDU
+// stfdx IIC_LdStSTFD
+// stfiwx IIC_LdStSTFD
+// stfs IIC_LdStSTFD
+// stfsu IIC_LdStSTFDU
+// stfsux IIC_LdStSTFDU
+// stfsx IIC_LdStSTFD
+// sth IIC_LdStStore
+// sthbrx IIC_LdStStore
+// sthu IIC_LdStStoreUpd
+// sthux IIC_LdStStoreUpd
+// sthx IIC_LdStStore
+// stmw IIC_LdStLMW
+// stswi IIC_LdStLMW
+// stswx IIC_LdStLMW
+// stvebx IIC_LdStSTVEBX
+// stvehx IIC_LdStSTVEBX
+// stvewx IIC_LdStSTVEBX
+// stvx IIC_LdStSTVEBX
+// stvxl IIC_LdStSTVEBX
+// stw IIC_LdStStore
+// stwbrx IIC_LdStStore
+// stwcx. IIC_LdStSTWCX
+// stwu IIC_LdStStoreUpd
+// stwux IIC_LdStStoreUpd
+// stwx IIC_LdStStore
+// subf IIC_IntGeneral
+// subfc IIC_IntGeneral
+// subfe IIC_IntGeneral
+// subfic IIC_IntGeneral
+// subfme IIC_IntGeneral
+// subfze IIC_IntGeneral
+// sync IIC_LdStSync
+// td IIC_IntTrapD
+// tdi IIC_IntTrapD
+// tlbia IIC_LdStSLBIA
+// tlbie IIC_LdStDCBF
+// tlbsync IIC_SprTLBSYNC
+// tw IIC_IntTrapW
+// twi IIC_IntTrapW
+// vaddcuw IIC_VecGeneral
+// vaddfp IIC_VecFP
+// vaddsbs IIC_VecGeneral
+// vaddshs IIC_VecGeneral
+// vaddsws IIC_VecGeneral
+// vaddubm IIC_VecGeneral
+// vaddubs IIC_VecGeneral
+// vadduhm IIC_VecGeneral
+// vadduhs IIC_VecGeneral
+// vadduwm IIC_VecGeneral
+// vadduws IIC_VecGeneral
+// vand IIC_VecGeneral
+// vandc IIC_VecGeneral
+// vavgsb IIC_VecGeneral
+// vavgsh IIC_VecGeneral
+// vavgsw IIC_VecGeneral
+// vavgub IIC_VecGeneral
+// vavguh IIC_VecGeneral
+// vavguw IIC_VecGeneral
+// vcfsx IIC_VecFP
+// vcfux IIC_VecFP
+// vcmpbfp IIC_VecFPCompare
+// vcmpeqfp IIC_VecFPCompare
+// vcmpequb IIC_VecGeneral
+// vcmpequh IIC_VecGeneral
+// vcmpequw IIC_VecGeneral
+// vcmpgefp IIC_VecFPCompare
+// vcmpgtfp IIC_VecFPCompare
+// vcmpgtsb IIC_VecGeneral
+// vcmpgtsh IIC_VecGeneral
+// vcmpgtsw IIC_VecGeneral
+// vcmpgtub IIC_VecGeneral
+// vcmpgtuh IIC_VecGeneral
+// vcmpgtuw IIC_VecGeneral
+// vctsxs IIC_VecFP
+// vctuxs IIC_VecFP
+// vexptefp IIC_VecFP
+// vlogefp IIC_VecFP
+// vmaddfp IIC_VecFP
+// vmaxfp IIC_VecFPCompare
+// vmaxsb IIC_VecGeneral
+// vmaxsh IIC_VecGeneral
+// vmaxsw IIC_VecGeneral
+// vmaxub IIC_VecGeneral
+// vmaxuh IIC_VecGeneral
+// vmaxuw IIC_VecGeneral
+// vmhaddshs IIC_VecComplex
+// vmhraddshs IIC_VecComplex
+// vminfp IIC_VecFPCompare
+// vminsb IIC_VecGeneral
+// vminsh IIC_VecGeneral
+// vminsw IIC_VecGeneral
+// vminub IIC_VecGeneral
+// vminuh IIC_VecGeneral
+// vminuw IIC_VecGeneral
+// vmladduhm IIC_VecComplex
+// vmrghb IIC_VecPerm
+// vmrghh IIC_VecPerm
+// vmrghw IIC_VecPerm
+// vmrglb IIC_VecPerm
+// vmrglh IIC_VecPerm
+// vmrglw IIC_VecPerm
+// vmsubfp IIC_VecFP
+// vmsummbm IIC_VecComplex
+// vmsumshm IIC_VecComplex
+// vmsumshs IIC_VecComplex
+// vmsumubm IIC_VecComplex
+// vmsumuhm IIC_VecComplex
+// vmsumuhs IIC_VecComplex
+// vmulesb IIC_VecComplex
+// vmulesh IIC_VecComplex
+// vmuleub IIC_VecComplex
+// vmuleuh IIC_VecComplex
+// vmulosb IIC_VecComplex
+// vmulosh IIC_VecComplex
+// vmuloub IIC_VecComplex
+// vmulouh IIC_VecComplex
+// vnor IIC_VecGeneral
+// vor IIC_VecGeneral
+// vperm IIC_VecPerm
+// vpkpx IIC_VecPerm
+// vpkshss IIC_VecPerm
+// vpkshus IIC_VecPerm
+// vpkswss IIC_VecPerm
+// vpkswus IIC_VecPerm
+// vpkuhum IIC_VecPerm
+// vpkuhus IIC_VecPerm
+// vpkuwum IIC_VecPerm
+// vpkuwus IIC_VecPerm
+// vrefp IIC_VecFPRound
+// vrfim IIC_VecFPRound
+// vrfin IIC_VecFPRound
+// vrfip IIC_VecFPRound
+// vrfiz IIC_VecFPRound
+// vrlb IIC_VecGeneral
+// vrlh IIC_VecGeneral
+// vrlw IIC_VecGeneral
+// vrsqrtefp IIC_VecFP
+// vsel IIC_VecGeneral
+// vsl IIC_VecVSL
+// vslb IIC_VecGeneral
+// vsldoi IIC_VecPerm
+// vslh IIC_VecGeneral
+// vslo IIC_VecPerm
+// vslw IIC_VecGeneral
+// vspltb IIC_VecPerm
+// vsplth IIC_VecPerm
+// vspltisb IIC_VecPerm
+// vspltish IIC_VecPerm
+// vspltisw IIC_VecPerm
+// vspltw IIC_VecPerm
+// vsr IIC_VecVSR
+// vsrab IIC_VecGeneral
+// vsrah IIC_VecGeneral
+// vsraw IIC_VecGeneral
+// vsrb IIC_VecGeneral
+// vsrh IIC_VecGeneral
+// vsro IIC_VecPerm
+// vsrw IIC_VecGeneral
+// vsubcuw IIC_VecGeneral
+// vsubfp IIC_VecFP
+// vsubsbs IIC_VecGeneral
+// vsubshs IIC_VecGeneral
+// vsubsws IIC_VecGeneral
+// vsububm IIC_VecGeneral
+// vsububs IIC_VecGeneral
+// vsubuhm IIC_VecGeneral
+// vsubuhs IIC_VecGeneral
+// vsubuwm IIC_VecGeneral
+// vsubuws IIC_VecGeneral
+// vsum2sws IIC_VecComplex
+// vsum4sbs IIC_VecComplex
+// vsum4shs IIC_VecComplex
+// vsum4ubs IIC_VecComplex
+// vsumsws IIC_VecComplex
+// vupkhpx IIC_VecPerm
+// vupkhsb IIC_VecPerm
+// vupkhsh IIC_VecPerm
+// vupklpx IIC_VecPerm
+// vupklsb IIC_VecPerm
+// vupklsh IIC_VecPerm
+// vxor IIC_VecGeneral
+// xor IIC_IntSimple
+// xori IIC_IntSimple
+// xoris IIC_IntSimple
+//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
new file mode 100644
index 000000000000..218fed248a31
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule440.td
@@ -0,0 +1,599 @@
+//===-- PPCSchedule440.td - PPC 440 Scheduling Definitions -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// PowerPC 440x6 Embedded Processor Core User's Manual.
+// IBM (as updated in) 2010.
+
+// The basic PPC 440 does not include a floating-point unit; the pipeline
+// timings here are constructed to match the FP2 unit shipped with the
+// PPC-440- and PPC-450-based Blue Gene (L and P) supercomputers.
+// References:
+// S. Chatterjee, et al. Design and exploitation of a high-performance
+// SIMD floating-point unit for Blue Gene/L.
+// IBM J. Res. & Dev. 49 (2/3) March/May 2005.
+// also:
+// Carlos Sosa and Brant Knudson. IBM System Blue Gene Solution:
+// Blue Gene/P Application Development.
+// IBM (as updated in) 2009.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC 440/450 chip sets
+//
+def P440_DISS1 : FuncUnit; // Issue unit 1
+def P440_DISS2 : FuncUnit; // Issue unit 2
+def P440_LRACC : FuncUnit; // Register access and dispatch for
+ // the simple integer (J-pipe) and
+ // load/store (L-pipe) pipelines
+def P440_IRACC : FuncUnit; // Register access and dispatch for
+ // the complex integer (I-pipe) pipeline
+def P440_FRACC : FuncUnit; // Register access and dispatch for
+ // the floating-point execution (F-pipe) pipeline
+def P440_IEXE1 : FuncUnit; // Execution stage 1 for the I pipeline
+def P440_IEXE2 : FuncUnit; // Execution stage 2 for the I pipeline
+def P440_IWB : FuncUnit; // Write-back unit for the I pipeline
+def P440_JEXE1 : FuncUnit; // Execution stage 1 for the J pipeline
+def P440_JEXE2 : FuncUnit; // Execution stage 2 for the J pipeline
+def P440_JWB : FuncUnit; // Write-back unit for the J pipeline
+def P440_AGEN : FuncUnit; // Address generation for the L pipeline
+def P440_CRD : FuncUnit; // D-cache access for the L pipeline
+def P440_LWB : FuncUnit; // Write-back unit for the L pipeline
+def P440_FEXE1 : FuncUnit; // Execution stage 1 for the F pipeline
+def P440_FEXE2 : FuncUnit; // Execution stage 2 for the F pipeline
+def P440_FEXE3 : FuncUnit; // Execution stage 3 for the F pipeline
+def P440_FEXE4 : FuncUnit; // Execution stage 4 for the F pipeline
+def P440_FEXE5 : FuncUnit; // Execution stage 5 for the F pipeline
+def P440_FEXE6 : FuncUnit; // Execution stage 6 for the F pipeline
+def P440_FWB : FuncUnit; // Write-back unit for the F pipeline
+
+def P440_LWARX_Hold : FuncUnit; // This is a pseudo-unit which is used
+ // to make sure that no lwarx/stwcx.
+ // instructions are issued while another
+ // lwarx/stwcx. is in the L pipe.
+
+def P440_GPR_Bypass : Bypass; // The bypass for general-purpose regs.
+def P440_FPR_Bypass : Bypass; // The bypass for floating-point regs.
+
+// Notes:
+// Instructions are held in the FRACC, LRACC and IRACC pipeline
+// stages until their source operands become ready. Exceptions:
+// - Store instructions will hold in the AGEN stage
+// - The integer multiply-accumulate instruction will hold in
+// the IEXE1 stage
+//
+// For most I-pipe operations, the result is available at the end of
+// the IEXE1 stage. Operations such as multiply and divide must
+// continue to execute in IEXE2 and IWB. Divide resides in IWB for
+// 33 cycles (multiply also calculates its result in IWB). For all
+// J-pipe instructions, the result is available
+// at the end of the JEXE1 stage. Loads have a 3-cycle latency
+// (data is not available until after the LWB stage).
+//
+// The L1 cache hit latency is four cycles for floating point loads
+// and three cycles for integer loads.
+//
+// The stwcx. instruction requires both the LRACC and the IRACC
+// dispatch stages. It must be issued from DISS0.
+//
+// All lwarx/stwcx. instructions hold in LRACC if another
+// uncommitted lwarx/stwcx. is in AGEN, CRD, or LWB.
+//
+// msync (a.k.a. sync) and mbar will hold in LWB until all load/store
+// resources are empty. AGEN and CRD are held empty until the msync/mbar
+// commits.
+//
+// Most floating-point instructions, computational and move,
+// have a 5-cycle latency. Divide takes longer (30 cycles). Instructions that
+// update the CR take 2 cycles. Stores take 3 cycles and, as mentioned above,
+// loads take 4 cycles (for L1 hit).
+
+//
+// This file defines the itinerary class data for the PPC 440 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPC440Itineraries : ProcessorItineraries<
+ [P440_DISS1, P440_DISS2, P440_FRACC, P440_IRACC, P440_IEXE1, P440_IEXE2,
+ P440_IWB, P440_LRACC, P440_JEXE1, P440_JEXE2, P440_JWB, P440_AGEN, P440_CRD,
+ P440_LWB, P440_FEXE1, P440_FEXE2, P440_FEXE3, P440_FEXE4, P440_FEXE5,
+ P440_FEXE6, P440_FWB, P440_LWARX_Hold],
+ [P440_GPR_Bypass, P440_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<33, [P440_IWB]>],
+ [36, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC, P440_LRACC]>,
+ InstrStage<1, [P440_IEXE1, P440_JEXE1]>,
+ InstrStage<1, [P440_IEXE2, P440_JEXE2]>,
+ InstrStage<1, [P440_IWB, P440_JWB]>],
+ [2, 0, 0],
+ [P440_GPR_Bypass,
+ P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0, 0],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [5, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [5, 2, 1, 1],
+ [NoBypass, P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<2, [P440_LWB]>],
+ [2, 1, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [P440_DISS1]>,
+ InstrStage<1, [P440_IRACC], 0>,
+ InstrStage<4, [P440_LWARX_Hold], 0>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<1, [P440_AGEN]>,
+ InstrStage<1, [P440_CRD]>,
+ InstrStage<1, [P440_LWB]>],
+ [4, 1, 1],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_LRACC]>,
+ InstrStage<3, [P440_AGEN], 1>,
+ InstrStage<2, [P440_CRD], 1>,
+ InstrStage<1, [P440_LWB]>]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC], 0>,
+ InstrStage<1, [P440_LRACC], 0>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_FEXE1], 0>,
+ InstrStage<1, [P440_AGEN], 0>,
+ InstrStage<1, [P440_JEXE1], 0>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_FEXE2], 0>,
+ InstrStage<1, [P440_CRD], 0>,
+ InstrStage<1, [P440_JEXE2], 0>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<6, [P440_FEXE3], 0>,
+ InstrStage<6, [P440_LWB], 0>,
+ InstrStage<6, [P440_JWB], 0>,
+ InstrStage<6, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [2, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [5, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [3, 0],
+ [P440_GPR_Bypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<3, [P440_IWB]>],
+ [6, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_IRACC]>,
+ InstrStage<1, [P440_IEXE1]>,
+ InstrStage<1, [P440_IEXE2]>,
+ InstrStage<1, [P440_IWB]>],
+ [4, 0],
+ [NoBypass, P440_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<25, [P440_FWB]>],
+ [31, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<13, [P440_FWB]>],
+ [19, 0, 0],
+ [NoBypass, P440_FPR_Bypass, P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0, 0, 0],
+ [P440_FPR_Bypass,
+ P440_FPR_Bypass, P440_FPR_Bypass,
+ P440_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [P440_DISS1, P440_DISS2]>,
+ InstrStage<1, [P440_FRACC]>,
+ InstrStage<1, [P440_FEXE1]>,
+ InstrStage<1, [P440_FEXE2]>,
+ InstrStage<1, [P440_FEXE3]>,
+ InstrStage<1, [P440_FEXE4]>,
+ InstrStage<1, [P440_FEXE5]>,
+ InstrStage<1, [P440_FEXE6]>,
+ InstrStage<1, [P440_FWB]>],
+ [6, 0],
+ [P440_FPR_Bypass, P440_FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// PPC440 machine model for scheduling and other instruction cost heuristics.
+
+def PPC440Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 instructions are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPC440Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
new file mode 100644
index 000000000000..14476963bad0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -0,0 +1,169 @@
+//===- PPCScheduleA2.td - PPC A2 Scheduling Definitions --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// A2 Processor User's Manual.
+// IBM (as updated in) 2010.
+
+//===----------------------------------------------------------------------===//
+// Functional units on the PowerPC A2 chip sets
+//
+def A2_XU : FuncUnit; // A2_XU pipeline
+def A2_FU : FuncUnit; // FI pipeline
+
+//
+// This file defines the itinerary class data for the PPC A2 processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def PPCA2Itineraries : ProcessorItineraries<
+ [A2_XU, A2_FU], [], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [A2_XU]>],
+ [39, 0, 0]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [A2_XU]>],
+ [71, 0, 0]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_IntTrapD, [InstrStage<1, [A2_XU]>],
+ [2, 0]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [A2_XU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [A2_XU]>],
+ [1, 0, 0]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [A2_XU]>],
+ [16, 0, 0]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [A2_XU]>],
+ [7, 0, 0]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [A2_XU]>],
+ [7, 9, 0, 0]>,
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [A2_XU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [A2_XU]>],
+ [6, 8, 0, 0]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [A2_XU]>],
+ [0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [A2_XU]>],
+ [2, 0, 0, 0]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [A2_XU]>],
+ [82, 0, 0]>, // L2 latency
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [A2_XU]>],
+ [6]>,
+ InstrItinData<IIC_SprISYNC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [A2_XU]>],
+ [16, 0]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [A2_XU]>],
+ [1, 0]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [A2_XU]>],
+ [4, 0]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [A2_XU]>],
+ [6, 0]>,
+ InstrItinData<IIC_SprRFI, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_SprSC, [InstrStage<1, [A2_XU]>],
+ [16]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [A2_FU]>],
+ [5, 0, 0]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [A2_FU]>],
+ [72, 0, 0]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [A2_FU]>],
+ [59, 0, 0]>,
+ InstrItinData<IIC_FPSqrtD, [InstrStage<1, [A2_FU]>],
+ [69, 0, 0]>,
+ InstrItinData<IIC_FPSqrtS, [InstrStage<1, [A2_FU]>],
+ [65, 0, 0]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [A2_FU]>],
+ [6, 0, 0, 0]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [A2_FU]>],
+ [6, 0]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// A2 machine model for scheduling and other instruction cost heuristics.
+
+def PPCA2Model : SchedMachineModel {
+ let IssueWidth = 1; // 1 instruction is dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 13;
+
+ let Itineraries = PPCA2Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
new file mode 100644
index 000000000000..dab89e3db353
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -0,0 +1,314 @@
+//===-- PPCScheduleE500mc.td - e500mc Scheduling Defs ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e500mc 32-bit
+// Power processor.
+//
+// All information is derived from the "e500mc Core Reference Manual",
+// Freescale Document Number E500MCRM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e500mc core:
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def E500_DIS0 : FuncUnit; // Dispatch stage - insn 1
+def E500_DIS1 : FuncUnit; // Dispatch stage - insn 2
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// Some instructions can only execute in SFX0 but not SFX1.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is executed.
+def E500_SFX0 : FuncUnit; // Simple unit 0
+def E500_SFX1 : FuncUnit; // Simple unit 1
+def E500_BU : FuncUnit; // Branch unit
+def E500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E500_CFX_0 : FuncUnit; // CFX pipeline
+def E500_LSU_0 : FuncUnit; // LSU pipeline
+def E500_FPU_0 : FuncUnit; // FPU pipeline
+
+def E500_GPR_Bypass : Bypass;
+def E500_FPR_Bypass : Bypass;
+def E500_CR_Bypass : Bypass;
+
+def PPCE500mcItineraries : ProcessorItineraries<
+ [E500_DIS0, E500_DIS1, E500_SFX0, E500_SFX1, E500_BU, E500_CFX_DivBypass,
+ E500_CFX_0, E500_LSU_0, E500_FPU_0],
+ [E500_CR_Bypass, E500_GPR_Bypass, E500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [5, 1, 1], // Latency = 1 or 2
+ [E500_CR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0], 0>,
+ InstrStage<14, [E500_CFX_DivBypass]>],
+ [17, 1, 1], // Latency=4..35, Repeat= 4..35
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11], // Latency = 8
+ [E500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<8, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_CFX_0]>],
+ [7, 1, 1], // Latency = 4, Repeat rate = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0]>],
+ [5, 1], // Latency = 2, Repeat rate = 2
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass,
+ E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_BU]>],
+ [4, 1], // Latency = 1
+ [E500_CR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1, 1], // Latency = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3, Repeat rate = 1
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1, 1], // Latency = 4
+ [E500_FPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [7, 1], // Latency = r+3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<3, [E500_LSU_0]>],
+ [6, 1, 1], // Latency = 3, Repeat rate = 3
+ [E500_GPR_Bypass,
+ E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>],
+ [6, 1], // Latency = 3
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0]>]>,
+ InstrItinData<IIC_SprMFSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1],
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_SFX0, E500_SFX1]>],
+ [5, 1], // Latency = 2, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [5, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<5, [E500_SFX0]>],
+ [8, 1],
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [E500_GPR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_GPR_Bypass, E500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_SFX0]>],
+ [7, 1], // Latency = 4, Repeat rate = 4
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0, E500_SFX1]>],
+ [4, 1], // Latency = 1, Repeat rate = 1
+ [E500_CR_Bypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSRIN, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<1, [E500_SFX0]>],
+ [4, 1],
+ [NoBypass, E500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<2, [E500_FPU_0]>],
+ [11, 1, 1], // Latency = 8, Repeat rate = 2
+ [E500_CR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<68, [E500_FPU_0]>],
+ [71, 1, 1], // Latency = 68, Repeat rate = 68
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<4, [E500_FPU_0]>],
+ [13, 1, 1, 1], // Latency = 10, Repeat rate = 4
+ [E500_FPR_Bypass,
+ E500_FPR_Bypass, E500_FPR_Bypass,
+ E500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E500_DIS0, E500_DIS1], 0>,
+ InstrStage<38, [E500_FPU_0]>],
+ [41, 1], // Latency = 38, Repeat rate = 38
+ [E500_FPR_Bypass, E500_FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e500mc machine model for scheduling and other instruction cost heuristics.
+
+def PPCE500mcModel : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 5; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE500mcItineraries;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
new file mode 100644
index 000000000000..de097d9d8cf5
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -0,0 +1,374 @@
+//===-- PPCScheduleE500mc.td - e5500 Scheduling Defs -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the Freescale e5500 64-bit
+// Power processor.
+//
+// All information is derived from the "e5500 Core Reference Manual",
+// Freescale Document Number e5500RM, Rev. 1, 03/2012.
+//
+//===----------------------------------------------------------------------===//
+// Relevant functional units in the Freescale e5500 core
+// (These are the same as for the e500mc)
+//
+// * Decode & Dispatch
+// Can dispatch up to 2 instructions per clock cycle to either the GPR Issue
+// queues (GIQx), FP Issue Queue (FIQ), or Branch issue queue (BIQ).
+def E5500_DIS0 : FuncUnit;
+def E5500_DIS1 : FuncUnit;
+
+// * Execute
+// 6 pipelined execution units: SFX0, SFX1, BU, FPU, LSU, CFX.
+// The CFX has a bypass path, allowing non-divide instructions to execute
+// while a divide instruction is being executed.
+def E5500_SFX0 : FuncUnit; // Simple unit 0
+def E5500_SFX1 : FuncUnit; // Simple unit 1
+def E5500_BU : FuncUnit; // Branch unit
+def E5500_CFX_DivBypass
+ : FuncUnit; // CFX divide bypass path
+def E5500_CFX_0 : FuncUnit; // CFX pipeline stage 0
+
+def E5500_CFX_1 : FuncUnit; // CFX pipeline stage 1
+
+def E5500_LSU_0 : FuncUnit; // LSU pipeline
+def E5500_FPU_0 : FuncUnit; // FPU pipeline
+
+def E5500_GPR_Bypass : Bypass;
+def E5500_FPR_Bypass : Bypass;
+def E5500_CR_Bypass : Bypass;
+
+def PPCE5500Itineraries : ProcessorItineraries<
+ [E5500_DIS0, E5500_DIS1, E5500_SFX0, E5500_SFX1, E5500_BU,
+ E5500_CFX_DivBypass, E5500_CFX_0, E5500_CFX_1,
+ E5500_LSU_0, E5500_FPU_0],
+ [E5500_CR_Bypass, E5500_GPR_Bypass, E5500_FPR_Bypass], [
+ InstrItinData<IIC_IntSimple, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 1 or 2
+ [E5500_CR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<26, [E5500_CFX_DivBypass]>],
+ [30, 2, 2], // Latency= 4..26, Repeat rate= 4..26
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntDivW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<16, [E5500_CFX_DivBypass]>],
+ [20, 2, 2], // Latency= 4..16, Repeat rate= 4..16
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMFFS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass]>,
+ InstrItinData<IIC_IntMTFSB0, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<7, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 7
+ [NoBypass, NoBypass, NoBypass]>,
+ InstrItinData<IIC_IntMulHD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [9, 2, 2], // Latency = 4..7, Repeat rate = 2..4
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulHWU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<1, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntMulLI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0], 0>,
+ InstrStage<2, [E5500_CFX_1]>],
+ [8, 2, 2], // Latency = 4 or 5, Repeat = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotate, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntRotateDI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5, 2, 2], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntShift, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0, E5500_SFX1]>],
+ [6, 2, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_IntTrapW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_SFX0]>],
+ [6, 2], // Latency = 2, Repeat rate = 2
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_BrCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass,
+ E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_BU]>],
+ [5, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_BrMCRX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5, 2, 2], // Latency = 1
+ [E5500_CR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStDCBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoad, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLoadUpd, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLoadUpdX,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStStore, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStStoreUpd,[InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStICBI, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLFDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [8, 2, 2], // Latency = 4, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHA, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLHAU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLHAUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [E5500_GPR_Bypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStLMW, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_LSU_0]>],
+ [8, 2], // Latency = r+3, Repeat rate = r+3
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<3, [E5500_LSU_0]>],
+ [7, 2, 2], // Latency = 3, Repeat rate = 3
+ [E5500_GPR_Bypass,
+ E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSTDU, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTDUX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass],
+ 2>, // 2 micro-ops
+ InstrItinData<IIC_LdStSTWCX, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>],
+ [7, 2], // Latency = 3, Repeat rate = 1
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_LdStSync, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0]>]>,
+ InstrItinData<IIC_SprMTMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_CFX_0]>],
+ [6, 2], // Latency = 2, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprTLBSYNC, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_LSU_0], 0>]>,
+ InstrItinData<IIC_SprMFCR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFCRF, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<5, [E5500_CFX_0]>],
+ [9, 2], // Latency = 5, Repeat rate = 5
+ [E5500_GPR_Bypass, E5500_CR_Bypass]>,
+ InstrItinData<IIC_SprMFMSR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_SFX0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [E5500_GPR_Bypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_CFX_0]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMFTB, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<4, [E5500_CFX_0]>],
+ [8, 2], // Latency = 4, Repeat rate = 4
+ [NoBypass, E5500_GPR_Bypass]>,
+ InstrItinData<IIC_SprMTSPR, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_SFX0, E5500_SFX1]>],
+ [5], // Latency = 1, Repeat rate = 1
+ [E5500_GPR_Bypass]>,
+ InstrItinData<IIC_FPGeneral, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPAddSub, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPCompare, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_CR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivD, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<31, [E5500_FPU_0]>],
+ [39, 2, 2], // Latency = 35, Repeat rate = 31
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPDivS, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<16, [E5500_FPU_0]>],
+ [24, 2, 2], // Latency = 20, Repeat rate = 16
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPFused, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<1, [E5500_FPU_0]>],
+ [11, 2, 2, 2], // Latency = 7, Repeat rate = 1
+ [E5500_FPR_Bypass,
+ E5500_FPR_Bypass, E5500_FPR_Bypass,
+ E5500_FPR_Bypass]>,
+ InstrItinData<IIC_FPRes, [InstrStage<1, [E5500_DIS0, E5500_DIS1], 0>,
+ InstrStage<2, [E5500_FPU_0]>],
+ [12, 2], // Latency = 8, Repeat rate = 2
+ [E5500_FPR_Bypass, E5500_FPR_Bypass]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// e5500 machine model for scheduling and other instruction cost heuristics.
+
+def PPCE5500Model : SchedMachineModel {
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = -1; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 6; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+
+ let Itineraries = PPCE5500Itineraries;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 000000000000..21efd8f8f6c9
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,80 @@
+//===-- PPCScheduleG3.td - PPC G3 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G3_BPU : FuncUnit; // Branch unit
+def G3_SLU : FuncUnit; // Store/load unit
+def G3_SRU : FuncUnit; // special register unit
+def G3_IU1 : FuncUnit; // integer unit 1 (simple)
+def G3_IU2 : FuncUnit; // integer unit 2 (complex)
+def G3_FPU1 : FuncUnit; // floating point unit 1
+
+def G3Itineraries : ProcessorItineraries<
+ [G3_IU1, G3_IU2, G3_FPU1, G3_BPU, G3_SRU, G3_SLU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G3_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G3_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G3_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G3_IU1, G3_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G3_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_LdStDCBA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<8, [G3_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<3, [G3_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<3, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G3_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<2, [G3_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G3_FPU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 000000000000..340773ef7876
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,96 @@
+//===-- PPCScheduleG4.td - PPC G4 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4_BPU : FuncUnit; // Branch unit
+def G4_SLU : FuncUnit; // Store/load unit
+def G4_SRU : FuncUnit; // special register unit
+def G4_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4_FPU1 : FuncUnit; // floating point unit 1
+def G4_VPU : FuncUnit; // vector permutation unit
+def G4_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4_VFPU : FuncUnit; // vector floating point unit
+
+def G4Itineraries : ProcessorItineraries<
+ [G4_IU1, G4_IU2, G4_SLU, G4_SRU, G4_BPU, G4_FPU1,
+ G4_VIU1, G4_VIU2, G4_VPU, G4_VFPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<19, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<3, [G4_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<6, [G4_IU1]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4_IU1]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4_IU1, G4_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<34, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<2, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<5, [G4_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<8, [G4_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<8, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<1, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<2, [G4_SRU]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<31, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<17, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [G4_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<10, [G4_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<3, [G4_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [G4_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [G4_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [G4_VIU1]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 000000000000..1d9f13fcb850
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,112 @@
+//===-- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4P_BPU : FuncUnit; // Branch unit
+def G4P_SLU : FuncUnit; // Store/load unit
+def G4P_SRU : FuncUnit; // special register unit
+def G4P_IU1 : FuncUnit; // integer unit 1 (simple)
+def G4P_IU2 : FuncUnit; // integer unit 2 (complex)
+def G4P_IU3 : FuncUnit; // integer unit 3 (simple)
+def G4P_IU4 : FuncUnit; // integer unit 4 (simple)
+def G4P_FPU1 : FuncUnit; // floating point unit 1
+def G4P_VPU : FuncUnit; // vector permutation unit
+def G4P_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G4P_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G4P_VFPU : FuncUnit; // vector floating point unit
+
+def G4PlusItineraries : ProcessorItineraries<
+ [G4P_IU1, G4P_IU2, G4P_IU3, G4P_IU4, G4P_BPU, G4P_SLU, G4P_FPU1,
+ G4P_VFPU, G4P_VIU1, G4P_VIU2, G4P_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<23, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<2, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G4P_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDCBI , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<4, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<37, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprMFSR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G4P_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<4, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<5, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprMTSRIN , [InstrStage<2, [G4P_IU2]>]>,
+ InstrItinData<IIC_SprRFI , [InstrStage<1, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<0, [G4P_IU1, G4P_IU2,
+ G4P_IU3, G4P_IU4]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<35, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<21, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<5, [G4P_FPU1]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<14, [G4P_FPU1]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<4, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G4P_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<4, [G4P_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<4, [G4P_VIU1]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G4P_VPU]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<2, [G4P_VPU]>]>
+]>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 000000000000..a3b73ab4454f
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,129 @@
+//===-- PPCScheduleG5.td - PPC G5 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5_BPU : FuncUnit; // Branch unit
+def G5_SLU : FuncUnit; // Store/load unit
+def G5_SRU : FuncUnit; // special register unit
+def G5_IU1 : FuncUnit; // integer unit 1 (simple)
+def G5_IU2 : FuncUnit; // integer unit 2 (complex)
+def G5_FPU1 : FuncUnit; // floating point unit 1
+def G5_FPU2 : FuncUnit; // floating point unit 2
+def G5_VPU : FuncUnit; // vector permutation unit
+def G5_VIU1 : FuncUnit; // vector integer unit 1 (simple)
+def G5_VIU2 : FuncUnit; // vector integer unit 2 (complex)
+def G5_VFPU : FuncUnit; // vector floating point unit
+
+def G5Itineraries : ProcessorItineraries<
+ [G5_IU1, G5_IU2, G5_SLU, G5_BPU, G5_FPU1, G5_FPU2,
+ G5_VFPU, G5_VIU1, G5_VIU2, G5_VPU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<3, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<68, [G5_IU1]>]>,
+ InstrItinData<IIC_IntDivW , [InstrStage<36, [G5_IU1]>]>,
+ InstrItinData<IIC_IntMFFS , [InstrStage<6, [G5_IU2]>]>,
+ InstrItinData<IIC_IntMFVSCR , [InstrStage<1, [G5_VFPU]>]>,
+ InstrItinData<IIC_IntMTFSB0 , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_IntMulHD , [InstrStage<7, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<5, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRFID , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotateDI , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<4, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntShift , [InstrStage<2, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [G5_IU1, G5_IU2]>]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [G5_BPU]>]>,
+ InstrItinData<IIC_BrCR , [InstrStage<4, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<2, [G5_BPU]>]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<3, [G5_BPU]>]>,
+ InstrItinData<IIC_LdStDCBF , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStStoreUpd, [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStDSS , [InstrStage<10, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStICBI , [InstrStage<40, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<4, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLDARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<64, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStLWARX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSLBIA , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_LdStSLBIE , [InstrStage<2, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<5, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<11, [G5_SLU]>]>,
+ InstrItinData<IIC_LdStSync , [InstrStage<35, [G5_SLU]>]>,
+ InstrItinData<IIC_SprISYNC , [InstrStage<40, [G5_SLU]>]>, // needs work
+ InstrItinData<IIC_SprMFSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTMSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMTSR , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprTLBSYNC , [InstrStage<3, [G5_SLU]>]>,
+ InstrItinData<IIC_SprMFCR , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<2, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFMSR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFSPR , [InstrStage<3, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMFTB , [InstrStage<10, [G5_IU2]>]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<8, [G5_IU2]>]>,
+ InstrItinData<IIC_SprSC , [InstrStage<1, [G5_IU2]>]>,
+ InstrItinData<IIC_FPGeneral , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPAddSub , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<8, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<33, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPFused , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPRes , [InstrStage<6, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<40, [G5_FPU1, G5_FPU2]>]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecFP , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<2, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<5, [G5_VIU2]>]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<3, [G5_VPU]>]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<8, [G5_VFPU]>]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<2, [G5_VIU1]>]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<3, [G5_VPU]>]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// G5 machine model for scheduling and other instruction cost heuristics.
+
+def G5Model : SchedMachineModel {
+ let IssueWidth = 4; // 4 (non-branch) instructions are dispatched per cycle.
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = G5Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
new file mode 100644
index 000000000000..d3e426975ec0
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP7.td
@@ -0,0 +1,385 @@
+//===-- PPCScheduleP7.td - PPC P7 Scheduling Definitions ---*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER7 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// Primary reference:
+// IBM POWER7 multicore server processor
+// B. Sinharoy, et al.
+// IBM J. Res. & Dev. (55) 3. May/June 2011.
+
+// Scheduling for the P7 involves tracking two types of resources:
+// 1. The dispatch bundle slots
+// 2. The functional unit resources
+
+// Dispatch units:
+def P7_DU1 : FuncUnit;
+def P7_DU2 : FuncUnit;
+def P7_DU3 : FuncUnit;
+def P7_DU4 : FuncUnit;
+def P7_DU5 : FuncUnit;
+def P7_DU6 : FuncUnit;
+
+def P7_LS1 : FuncUnit; // Load/Store pipeline 1
+def P7_LS2 : FuncUnit; // Load/Store pipeline 2
+
+def P7_FX1 : FuncUnit; // FX pipeline 1
+def P7_FX2 : FuncUnit; // FX pipeline 2
+
+// VS pipeline 1 (vector integer ops. always here)
+def P7_VS1 : FuncUnit; // VS pipeline 1
+// VS pipeline 2 (128-bit stores and perms. here)
+def P7_VS2 : FuncUnit; // VS pipeline 2
+
+def P7_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs)
+def P7_BRU : FuncUnit; // BR unit
+
+// Notes:
+// Each LSU pipeline can also execute FX add and logical instructions.
+// Each LSU pipeline can complete a load or store in one cycle.
+//
+// Each store is broken into two parts, AGEN goes to the LSU while a
+// "data steering" op. goes to the FXU or VSU.
+//
+// FX loads have a two cycle load-to-use latency (so one "bubble" cycle).
+// VSU loads have a three cycle load-to-use latency (so two "bubble" cycle).
+//
+// Frequent FX ops. take only one cycle and results can be used again in the
+// next cycle (there is a self-bypass). Getting results from the other FX
+// pipeline takes an additional cycle.
+//
+// The VSU XS is similar to the POWER6, but with a pipeline length of 2 cycles
+// (instead of 3 cycles on the POWER6). VSU XS handles vector FX-style ops.
+// Dispatch of an instruction to VS1 that uses four single prec. inputs
+// (either to a float or XC op). prevents dispatch in that cycle to VS2 of any
+// floating point instruction.
+//
+// The VSU PM is similar to the POWER6, but with a pipeline length of 3 cycles
+// (instead of 4 cycles on the POWER6). vsel is handled by the PM pipeline
+// (unlike on the POWER6).
+//
+// FMA from the VSUs can forward results in 6 cycles. VS1 XS and vector FP
+// share the same write-back, and have a 5-cycle latency difference, so the
+// IFU/IDU will not dispatch an XS instructon 5 cycles after a vector FP
+// op. has been dispatched to VS1.
+//
+// Three cycles after an L1 cache hit, a dependent VSU instruction can issue.
+//
+// Instruction dispatch groups have (at most) four non-branch instructions, and
+// two branches. Unlike on the POWER4/5, a branch does not automatically
+// end the dispatch group, but a second branch must be the last in the group.
+
+def P7Itineraries : ProcessorItineraries<
+ [P7_DU1, P7_DU2, P7_DU3, P7_DU4, P7_DU5, P7_DU6,
+ P7_LS1, P7_LS2, P7_FX1, P7_FX2, P7_VS1, P7_VS2, P7_CRU, P7_BRU], [], [
+ InstrItinData<IIC_IntSimple , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2,
+ P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ // FIXME: Add record-form itinerary data.
+ InstrItinData<IIC_IntDivW , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<36, [P7_FX1, P7_FX2]>],
+ [36, 1, 1]>,
+ InstrItinData<IIC_IntDivD , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<68, [P7_FX1, P7_FX2]>],
+ [68, 1, 1]>,
+ InstrItinData<IIC_IntMulHW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulHWU , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntMulLI , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 1, 1]>,
+ InstrItinData<IIC_IntRotate , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntRotateD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntShift , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_IntTrapW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_IntTrapD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1]>,
+ InstrItinData<IIC_BrB , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCR , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU5, P7_DU6], 0>,
+ InstrStage<1, [P7_BRU]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLoad , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpd , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLoadUpdX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStLDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 2, 1, 1]>,
+ InstrItinData<IIC_LdStLDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLVecX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLFDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 3, 1, 1]>,
+ InstrItinData<IIC_LdStLHA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLHAU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLHAUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [4, 4, 1, 1]>,
+ InstrItinData<IIC_LdStLWA , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLWARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLDARX, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [3, 1, 1]>,
+ InstrItinData<IIC_LdStLMW , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_LdStStore , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDUX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTFDU , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_FX1, P7_FX2], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<IIC_LdStSTVEBX , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTDCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_LdStSTWCX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_LS1, P7_LS2]>],
+ [1, 1, 1]>,
+ InstrItinData<IIC_BrMCRX , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_DU2], 0>,
+ InstrStage<1, [P7_DU3], 0>,
+ InstrStage<1, [P7_DU4], 0>,
+ InstrStage<1, [P7_CRU]>,
+ InstrStage<1, [P7_FX1, P7_FX2]>],
+ [3, 1]>, // mtcr
+ InstrItinData<IIC_SprMFCR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [6, 1]>,
+ InstrItinData<IIC_SprMFCRF , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_CRU]>],
+ [3, 1]>,
+ InstrItinData<IIC_SprMTSPR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_FX1]>],
+ [4, 1]>, // mtctr
+ InstrItinData<IIC_FPGeneral , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_FPCompare , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [8, 1, 1]>,
+ InstrItinData<IIC_FPDivD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [33, 1, 1]>,
+ InstrItinData<IIC_FPDivS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [27, 1, 1]>,
+ InstrItinData<IIC_FPSqrtD , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [44, 1, 1]>,
+ InstrItinData<IIC_FPSqrtS , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [32, 1, 1]>,
+ InstrItinData<IIC_FPFused , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1, 1]>,
+ InstrItinData<IIC_FPRes , [InstrStage<1, [P7_DU1, P7_DU2,
+ P7_DU3, P7_DU4], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [5, 1, 1]>,
+ InstrItinData<IIC_VecGeneral , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSL , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecVSR , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [2, 1, 1]>,
+ InstrItinData<IIC_VecFP , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPCompare, [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecFPRound , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1, P7_VS2]>],
+ [6, 1, 1]>,
+ InstrItinData<IIC_VecComplex , [InstrStage<1, [P7_DU1], 0>,
+ InstrStage<1, [P7_VS1]>],
+ [7, 1, 1]>,
+ InstrItinData<IIC_VecPerm , [InstrStage<1, [P7_DU1, P7_DU2], 0>,
+ InstrStage<1, [P7_VS2]>],
+ [3, 1, 1]>
+]>;
+
+// ===---------------------------------------------------------------------===//
+// P7 machine model for scheduling and other instruction cost heuristics.
+
+def P7Model : SchedMachineModel {
+ let IssueWidth = 6; // 4 (non-branch) instructions are dispatched per cycle.
+ // Note that the dispatch bundle size is 6 (including
+ // branches), but the total internal issue bandwidth per
+ // cycle (from all queues) is 8.
+
+ let MinLatency = 0; // Out-of-order dispatch.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 16;
+
+ let Itineraries = P7Itineraries;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..dc1674214769
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,22 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL)
+ : TargetSelectionDAGInfo(DL) {}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 000000000000..b2e7f3b5f2ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCTargetMachine;
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit PPCSelectionDAGInfo(const DataLayout *DL);
+ ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 000000000000..b51512d335fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,277 @@
+//===-- PowerPCSubtarget.cpp - PPC Subtarget Information ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-subtarget"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
+/// Return the datalayout string of a subtarget.
+static std::string getDataLayoutString(const PPCSubtarget &ST) {
+ const Triple &T = ST.getTargetTriple();
+
+ std::string Ret;
+
+ // Most PPC* platforms are big endian, PPC64LE is little endian.
+ if (ST.isLittleEndian())
+ Ret = "e";
+ else
+ Ret = "E";
+
+ Ret += DataLayout::getManglingComponent(T);
+
+ // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
+ // pointers.
+ if (!ST.isPPC64() || T.getOS() == Triple::Lv2)
+ Ret += "-p:32:32";
+
+ // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+ // documentation are wrong; these are correct (i.e. "what gcc does").
+ if (ST.isPPC64() || ST.isSVR4ABI())
+ Ret += "-i64:64";
+ else
+ Ret += "-f64:32:64";
+
+ // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
+ if (ST.isPPC64())
+ Ret += "-n32:64";
+ else
+ Ret += "-n32";
+
+ return Ret;
+}
+
+PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU,
+ StringRef FS) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ return *this;
+}
+
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, PPCTargetMachine &TM,
+ bool is64Bit, CodeGenOpt::Level OptLevel)
+ : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
+ OptLevel(OptLevel),
+ FrameLowering(initializeSubtargetDependencies(CPU, FS)),
+ DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this),
+ TLInfo(TM), TSInfo(&DL) {}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
+}
+
+void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+
+void PPCSubtarget::initializeEnvironment() {
+ StackAlignment = 16;
+ DarwinDirective = PPC::DIR_NONE;
+ HasMFOCRF = false;
+ Has64BitSupport = false;
+ Use64BitRegs = false;
+ UseCRBits = false;
+ HasAltivec = false;
+ HasQPX = false;
+ HasVSX = false;
+ HasFCPSGN = false;
+ HasFSQRT = false;
+ HasFRE = false;
+ HasFRES = false;
+ HasFRSQRTE = false;
+ HasFRSQRTES = false;
+ HasRecipPrec = false;
+ HasSTFIWX = false;
+ HasLFIWAX = false;
+ HasFPRND = false;
+ HasFPCVT = false;
+ HasISEL = false;
+ HasPOPCNTD = false;
+ HasLDBRX = false;
+ IsBookE = false;
+ DeprecatedMFTB = false;
+ DeprecatedDST = false;
+ HasLazyResolverStubs = false;
+ IsJITCodeModel = false;
+}
+
+void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
+ // Determine default and user specified characteristics
+ std::string CPUName = CPU;
+ if (CPUName.empty())
+ CPUName = "generic";
+#if (defined(__APPLE__) || defined(__linux__)) && \
+ (defined(__ppc__) || defined(__powerpc__))
+ if (CPUName == "generic")
+ CPUName = sys::getHostCPUName();
+#endif
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(CPUName);
+
+ // Make sure 64-bit features are available when CPUname is generic
+ std::string FullFS = FS;
+
+ // If we are generating code for ppc64, verify that options make sense.
+ if (IsPPC64) {
+ Has64BitSupport = true;
+ // Silently force 64-bit register use on ppc64.
+ Use64BitRegs = true;
+ if (!FullFS.empty())
+ FullFS = "+64bit," + FullFS;
+ else
+ FullFS = "+64bit";
+ }
+
+ // At -O2 and above, track CR bits as individual registers.
+ if (OptLevel >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+
+ // Parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+
+ // If the user requested use of 64-bit regs, but the cpu selected doesn't
+ // support it, ignore.
+ if (use64BitRegs() && !has64BitSupport())
+ Use64BitRegs = false;
+
+ // Set up darwin-specific properties.
+ if (isDarwin())
+ HasLazyResolverStubs = true;
+
+ // QPX requires a 32-byte aligned stack. Note that we need to do this if
+ // we're compiling for a BG/Q system regardless of whether or not QPX
+ // is enabled because external functions will assume this alignment.
+ if (hasQPX() || isBGQ())
+ StackAlignment = 32;
+
+ // Determine endianness.
+ IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
+
+ // FIXME: For now, we disable VSX in little-endian mode until endian
+ // issues in those instructions can be addressed.
+ if (IsLittleEndian)
+ HasVSX = false;
+}
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub. This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+ // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+ if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+ return false;
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
+ bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+ return false;
+ return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+ GV->hasCommonLinkage() || isDecl;
+}
+
+// Embedded cores need aggressive scheduling (and some others also benefit).
+static bool needsAggressiveScheduling(unsigned Directive) {
+ switch (Directive) {
+ default: return false;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ case PPC::DIR_PWR7:
+ case PPC::DIR_PWR8:
+ return true;
+ }
+}
+
+bool PPCSubtarget::enableMachineScheduler() const {
+ // Enable MI scheduling for the embedded cores.
+ // FIXME: Enable this for all cores (some additional modeling
+ // may be necessary).
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
+// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
+
+PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
+ return TargetSubtargetInfo::ANTIDEP_ALL;
+}
+
+void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(isPPC64() ?
+ &PPC::G8RCRegClass : &PPC::GPRCRegClass);
+}
+
+void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (needsAggressiveScheduling(DarwinDirective)) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+
+ // Spilling is generally expensive on all PPC cores, so always enable
+ // register-pressure tracking.
+ Policy.ShouldTrackPressure = true;
+}
+
+bool PPCSubtarget::useAA() const {
+ // Use AA during code generation for the embedded cores.
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 000000000000..a3cedafb5ef2
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,251 @@
+//===-- PPCSubtarget.h - Define Subtarget for the PPC ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCJITInfo.h"
+#include "PPCSelectionDAGInfo.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+class StringRef;
+
+namespace PPC {
+ // -m directive values.
+ enum {
+ DIR_NONE,
+ DIR_32,
+ DIR_440,
+ DIR_601,
+ DIR_602,
+ DIR_603,
+ DIR_7400,
+ DIR_750,
+ DIR_970,
+ DIR_A2,
+ DIR_E500mc,
+ DIR_E5500,
+ DIR_PWR3,
+ DIR_PWR4,
+ DIR_PWR5,
+ DIR_PWR5X,
+ DIR_PWR6,
+ DIR_PWR6X,
+ DIR_PWR7,
+ DIR_PWR8,
+ DIR_64
+ };
+}
+
+class GlobalValue;
+class TargetMachine;
+
+class PPCSubtarget : public PPCGenSubtargetInfo {
+protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which cpu directive was used.
+ unsigned DarwinDirective;
+
+ /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+ bool HasMFOCRF;
+ bool Has64BitSupport;
+ bool Use64BitRegs;
+ bool UseCRBits;
+ bool IsPPC64;
+ bool HasAltivec;
+ bool HasQPX;
+ bool HasVSX;
+ bool HasFCPSGN;
+ bool HasFSQRT;
+ bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
+ bool HasRecipPrec;
+ bool HasSTFIWX;
+ bool HasLFIWAX;
+ bool HasFPRND;
+ bool HasFPCVT;
+ bool HasISEL;
+ bool HasPOPCNTD;
+ bool HasLDBRX;
+ bool IsBookE;
+ bool DeprecatedMFTB;
+ bool DeprecatedDST;
+ bool HasLazyResolverStubs;
+ bool IsJITCodeModel;
+ bool IsLittleEndian;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
+
+ /// OptLevel - What default optimization level we're emitting code for.
+ CodeGenOpt::Level OptLevel;
+
+ PPCFrameLowering FrameLowering;
+ const DataLayout DL;
+ PPCInstrInfo InstrInfo;
+ PPCJITInfo JITInfo;
+ PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
+
+public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ PPCSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, PPCTargetMachine &TM, bool is64Bit,
+ CodeGenOpt::Level OptLevel);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by every
+ /// function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getDarwinDirective - Returns the -m directive specified for the cpu.
+ ///
+ unsigned getDarwinDirective() const { return DarwinDirective; }
+
+ /// getInstrItins - Return the instruction itineraries based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+ const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; }
+ const DataLayout *getDataLayout() const { return &DL; }
+ const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
+ PPCJITInfo *getJITInfo() { return &JITInfo; }
+ const PPCTargetLowering *getTargetLowering() const { return &TLInfo; }
+ const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; }
+
+ /// initializeSubtargetDependencies - Initializes using a CPU and feature string
+ /// so that we can use initializer lists for subtarget initialization.
+ PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
+
+ /// \brief Reset the features for the PowerPC target.
+ void resetSubtargetFeatures(const MachineFunction *MF) override;
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+
+public:
+ /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+ ///
+ bool isPPC64() const { return IsPPC64; }
+
+ /// has64BitSupport - Return true if the selected CPU supports 64-bit
+ /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+ bool has64BitSupport() const { return Has64BitSupport; }
+
+ /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+ /// registers in 32-bit mode when possible. This can only true if
+ /// has64BitSupport() returns true.
+ bool use64BitRegs() const { return Use64BitRegs; }
+
+ /// useCRBits - Return true if we should store and manipulate i1 values in
+ /// the individual condition register bits.
+ bool useCRBits() const { return UseCRBits; }
+
+ /// hasLazyResolverStub - Return true if accesses to the specified global have
+ /// to go through a dyld lazy resolution stub. This means that an extra load
+ /// is required to get the address of the global.
+ bool hasLazyResolverStub(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ // isJITCodeModel - True if we're generating code for the JIT
+ bool isJITCodeModel() const { return IsJITCodeModel; }
+
+ // isLittleEndian - True if generating little-endian code
+ bool isLittleEndian() const { return IsLittleEndian; }
+
+ // Specific obvious features.
+ bool hasFCPSGN() const { return HasFCPSGN; }
+ bool hasFSQRT() const { return HasFSQRT; }
+ bool hasFRE() const { return HasFRE; }
+ bool hasFRES() const { return HasFRES; }
+ bool hasFRSQRTE() const { return HasFRSQRTE; }
+ bool hasFRSQRTES() const { return HasFRSQRTES; }
+ bool hasRecipPrec() const { return HasRecipPrec; }
+ bool hasSTFIWX() const { return HasSTFIWX; }
+ bool hasLFIWAX() const { return HasLFIWAX; }
+ bool hasFPRND() const { return HasFPRND; }
+ bool hasFPCVT() const { return HasFPCVT; }
+ bool hasAltivec() const { return HasAltivec; }
+ bool hasQPX() const { return HasQPX; }
+ bool hasVSX() const { return HasVSX; }
+ bool hasMFOCRF() const { return HasMFOCRF; }
+ bool hasISEL() const { return HasISEL; }
+ bool hasPOPCNTD() const { return HasPOPCNTD; }
+ bool hasLDBRX() const { return HasLDBRX; }
+ bool isBookE() const { return IsBookE; }
+ bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
+ bool isDeprecatedDST() const { return DeprecatedDST; }
+
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ /// isDarwin - True if this is any darwin platform.
+ bool isDarwin() const { return TargetTriple.isMacOSX(); }
+ /// isBGQ - True if this is a BG/Q platform.
+ bool isBGQ() const { return TargetTriple.getVendor() == Triple::BGQ; }
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
+ bool isDarwinABI() const { return isDarwin(); }
+ bool isSVR4ABI() const { return !isDarwin(); }
+ /// FIXME: Should use a command-line option.
+ bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() &&
+ isLittleEndian(); }
+
+ bool enableEarlyIfConversion() const override { return hasISEL(); }
+
+ // Scheduling customization.
+ bool enableMachineScheduler() const override;
+ // This overrides the PostRAScheduler bit in the SchedModel for each CPU.
+ bool enablePostMachineScheduler() const override;
+ AntiDepBreakMode getAntiDepBreakMode() const override;
+ void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
+
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const override;
+ bool useAA() const override;
+};
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 000000000000..9563b9045c39
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,170 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPC.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::
+opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
+ cl::desc("Disable CTR loops for PPC"));
+
+static cl::opt<bool>
+VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
+ cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
+
+extern "C" void LLVMInitializePowerPCTarget() {
+ // Register the targets
+ RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
+ RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+ RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
+}
+
+PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64Bit)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, *this, is64Bit, OL) {
+ initAsmInfo();
+}
+
+void PPC32TargetMachine::anchor() { }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
+}
+
+void PPC64TargetMachine::anchor() { }
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// PPC Code Generator Pass Configuration Options.
+class PPCPassConfig : public TargetPassConfig {
+public:
+ PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ PPCTargetMachine &getPPCTargetMachine() const {
+ return getTM<PPCTargetMachine>();
+ }
+
+ const PPCSubtarget &getPPCSubtarget() const {
+ return *getPPCTargetMachine().getSubtargetImpl();
+ }
+
+ bool addPreISel() override;
+ bool addILPOpts() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
+};
+} // namespace
+
+TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new PPCPassConfig(this, PM);
+}
+
+bool PPCPassConfig::addPreISel() {
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoops(getPPCTargetMachine()));
+
+ return false;
+}
+
+bool PPCPassConfig::addILPOpts() {
+ addPass(&EarlyIfConverterID);
+ return true;
+}
+
+bool PPCPassConfig::addInstSelector() {
+ // Install an instruction selector.
+ addPass(createPPCISelDag(getPPCTargetMachine()));
+
+#ifndef NDEBUG
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoopsVerify());
+#endif
+
+ addPass(createPPCVSXCopyPass());
+ return false;
+}
+
+bool PPCPassConfig::addPreRegAlloc() {
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ &PPCVSXFMAMutateID);
+ return false;
+}
+
+bool PPCPassConfig::addPreSched2() {
+ addPass(createPPCVSXCopyCleanupPass());
+
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(&IfConverterID);
+
+ return true;
+}
+
+bool PPCPassConfig::addPreEmitPass() {
+ if (getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCEarlyReturnPass());
+ // Must run branch selection immediately preceding the asm printer.
+ addPass(createPPCBranchSelectionPass());
+ return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) {
+ // Inform the subtarget that we are in JIT mode. FIXME: does this break macho
+ // writing?
+ Subtarget.SetJITMode();
+
+ // Machine code emitter pass for PowerPC.
+ PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+
+ return false;
+}
+
+void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our PPC pass. This
+ // allows the PPC pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createPPCTargetTransformInfoPass(this));
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 000000000000..4c7029ca7a36
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,93 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+ PPCSubtarget Subtarget;
+
+public:
+ PPCTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL, bool is64Bit);
+
+ const PPCInstrInfo *getInstrInfo() const override {
+ return getSubtargetImpl()->getInstrInfo();
+ }
+ const PPCFrameLowering *getFrameLowering() const override {
+ return getSubtargetImpl()->getFrameLowering();
+ }
+ PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); }
+ const PPCTargetLowering *getTargetLowering() const override {
+ return getSubtargetImpl()->getTargetLowering();
+ }
+ const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
+ return getSubtargetImpl()->getSelectionDAGInfo();
+ }
+ const PPCRegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
+ }
+
+ const DataLayout *getDataLayout() const override {
+ return getSubtargetImpl()->getDataLayout();
+ }
+ const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &getSubtargetImpl()->getInstrItineraryData();
+ }
+
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) override;
+
+ /// \brief Register PPC analysis passes with a pass manager.
+ void addAnalysisPasses(PassManagerBase &PM) override;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
+public:
+ PPC32TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+ virtual void anchor();
+public:
+ PPC64TargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
new file mode 100644
index 000000000000..2903cc192aa8
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -0,0 +1,63 @@
+//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetObjectFile.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+
+using namespace llvm;
+
+void
+PPC64LinuxTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
+const MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
+ const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
+ // when we have a constant that contains global relocations. This is
+ // necessary because of this ABI's handling of pointers to functions in
+ // a shared library. The address of a function is actually the address
+ // of a function descriptor, which resides in the .opd section. Generated
+ // code uses the descriptor directly rather than going via the GOT as some
+ // other ABIs do, which means that initialized function pointers must
+ // reference the descriptor. The linker must convert copy relocs of
+ // pointers to functions in shared libraries into dynamic relocations,
+ // because of an ordering problem with initialization of copy relocs and
+ // PLT entries. The dynamic relocation will be initialized by the dynamic
+ // linker, so we must use DataRelROSection instead of ReadOnlySection.
+ // For more information, see the description of ELIMINATE_COPY_RELOCS in
+ // GNU ld.
+ if (Kind.isReadOnly()) {
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+ if (GVar && GVar->isConstant() &&
+ (GVar->getInitializer()->getRelocationInfo() ==
+ Constant::GlobalRelocations))
+ Kind = SectionKind::getReadOnlyWithRel();
+ }
+
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
+ Mang, TM);
+}
+
+const MCExpr *PPC64LinuxTargetObjectFile::
+getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext());
+ return MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(0x8000, getContext()),
+ getContext());
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
new file mode 100644
index 000000000000..3e71bbc67379
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -0,0 +1,35 @@
+//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ /// PPC64LinuxTargetObjectFile - This implementation is used for
+ /// 64-bit PowerPC Linux.
+ class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const override;
+
+ /// \brief Describe a TLS variable address within debug info.
+ const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
new file mode 100644
index 000000000000..73fb69101353
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -0,0 +1,27 @@
+//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETSTREAMER_H
+#define PPCTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class PPCTargetStreamer : public MCTargetStreamer {
+public:
+ PPCTargetStreamer(MCStreamer &S);
+ virtual ~PPCTargetStreamer();
+ virtual void emitTCEntry(const MCSymbol &S) = 0;
+ virtual void emitMachine(StringRef CPU) = 0;
+ virtual void emitAbiVersion(int AbiVersion) = 0;
+ virtual void emitLocalEntry(MCSymbol *S, const MCExpr *LocalOffset) = 0;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
new file mode 100644
index 000000000000..007901b23e0c
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -0,0 +1,420 @@
+//===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// PPC target machine. It uses the target's detailed information to provide
+/// more precise answers to certain TTI queries, while letting the target
+/// independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/CostTable.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppctti"
+
+static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
+cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
+
+// Declare the pass initialization routine locally as target-specific passes
+// don't have a target-wide initialization entry point, and so we rely on the
+// pass constructor initialization.
+namespace llvm {
+void initializePPCTTIPass(PassRegistry &);
+}
+
+namespace {
+
+class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
+ const PPCSubtarget *ST;
+ const PPCTargetLowering *TLI;
+
+public:
+ PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
+ llvm_unreachable("This pass cannot be directly constructed");
+ }
+
+ PPCTTI(const PPCTargetMachine *TM)
+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
+ TLI(TM->getTargetLowering()) {
+ initializePPCTTIPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void initializePass() override {
+ pushTTIStack(this);
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ TargetTransformInfo::getAnalysisUsage(AU);
+ }
+
+ /// Pass identification.
+ static char ID;
+
+ /// Provide necessary pointer adjustments for the two base classes.
+ virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ if (ID == &TargetTransformInfo::ID)
+ return (TargetTransformInfo*)this;
+ return this;
+ }
+
+ /// \name Scalar TTI Implementations
+ /// @{
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+
+ virtual PopcntSupportKind
+ getPopcntSupport(unsigned TyWidth) const override;
+ virtual void getUnrollingPreferences(
+ Loop *L, UnrollingPreferences &UP) const override;
+
+ /// @}
+
+ /// \name Vector TTI Implementations
+ /// @{
+
+ virtual unsigned getNumberOfRegisters(bool Vector) const override;
+ virtual unsigned getRegisterBitWidth(bool Vector) const override;
+ virtual unsigned getMaximumUnrollFactor() const override;
+ virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind,
+ OperandValueKind) const override;
+ virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
+ int Index, Type *SubTp) const override;
+ virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const override;
+ virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const override;
+ virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const override;
+ virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const override;
+
+ /// @}
+};
+
+} // end anonymous namespace
+
+INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti",
+ "PPC Target Transform Info", true, true, false)
+char PPCTTI::ID = 0;
+
+ImmutablePass *
+llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) {
+ return new PPCTTI(TM);
+}
+
+
+//===----------------------------------------------------------------------===//
+//
+// PPC cost model.
+//
+//===----------------------------------------------------------------------===//
+
+PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (ST->hasPOPCNTD() && TyWidth <= 64)
+ return PSK_FastHardware;
+ return PSK_Software;
+}
+
+unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ if (Imm == 0)
+ return TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Basic;
+
+ if (isInt<32>(Imm.getSExtValue())) {
+ // A constant that can be materialized using lis.
+ if ((Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Basic;
+
+ return 2 * TCC_Basic;
+ }
+ }
+
+ return 4 * TCC_Basic;
+}
+
+unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ switch (IID) {
+ default: return TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+ break;
+ }
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
+unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ unsigned ImmIdx = ~0U;
+ bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
+ ZeroFree = false;
+ switch (Opcode) {
+ default: return TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TCC_Basic;
+ return TCC_Free;
+ case Instruction::And:
+ RunFree = true; // (for the rotate-and-mask instructions)
+ // Fallthrough...
+ case Instruction::Add:
+ case Instruction::Or:
+ case Instruction::Xor:
+ ShiftedFree = true;
+ // Fallthrough...
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ ImmIdx = 1;
+ break;
+ case Instruction::ICmp:
+ UnsignedFree = true;
+ ImmIdx = 1;
+ // Fallthrough... (zero comparisons can use record-form instructions)
+ case Instruction::Select:
+ ZeroFree = true;
+ break;
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Store:
+ break;
+ }
+
+ if (ZeroFree && Imm == 0)
+ return TCC_Free;
+
+ if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+
+ if (RunFree) {
+ if (Imm.getBitWidth() <= 32 &&
+ (isShiftedMask_32(Imm.getZExtValue()) ||
+ isShiftedMask_32(~Imm.getZExtValue())))
+ return TCC_Free;
+
+
+ if (ST->isPPC64() &&
+ (isShiftedMask_64(Imm.getZExtValue()) ||
+ isShiftedMask_64(~Imm.getZExtValue())))
+ return TCC_Free;
+ }
+
+ if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
+ return TCC_Free;
+
+ if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Free;
+ }
+
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
+void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
+ if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 is in-order with a deep pipeline, and concatenation unrolling
+ // helps expose latency-hiding opportunities to the instruction scheduler.
+ UP.Partial = UP.Runtime = true;
+ }
+}
+
+unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
+ if (Vector && !ST->hasAltivec())
+ return 0;
+ return ST->hasVSX() ? 64 : 32;
+}
+
+unsigned PPCTTI::getRegisterBitWidth(bool Vector) const {
+ if (Vector) {
+ if (ST->hasAltivec()) return 128;
+ return 0;
+ }
+
+ if (ST->isPPC64())
+ return 64;
+ return 32;
+
+}
+
+unsigned PPCTTI::getMaximumUnrollFactor() const {
+ unsigned Directive = ST->getDarwinDirective();
+ // The 440 has no SIMD support, but floating-point instructions
+ // have a 5-cycle latency, so unroll by 5x for latency hiding.
+ if (Directive == PPC::DIR_440)
+ return 5;
+
+ // The A2 has no SIMD support, but floating-point instructions
+ // have a 6-cycle latency, so unroll by 6x for latency hiding.
+ if (Directive == PPC::DIR_A2)
+ return 6;
+
+ // FIXME: For lack of any better information, do no harm...
+ if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500)
+ return 1;
+
+ // For most things, modern systems have two execution units (and
+ // out-of-order execution).
+ return 2;
+}
+
+unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Op1Info,
+ OperandValueKind Op2Info) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ // Fallback to the default implementation.
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info,
+ Op2Info);
+}
+
+unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
+ Type *SubTp) const {
+ return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp);
+}
+
+unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const {
+ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode");
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ if (ST->hasVSX() && Val->getScalarType()->isDoubleTy()) {
+ // Double-precision scalars are already located in index #0.
+ if (Index == 0)
+ return 0;
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+ }
+
+ // Estimated cost of a load-hit-store delay. This was obtained
+ // experimentally as a minimum needed to prevent unprofitable
+ // vectorization for the paq8p benchmark. It may need to be
+ // raised further if other unprofitable cases remain.
+ unsigned LHSPenalty = 2;
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ LHSPenalty += 7;
+
+ // Vector element insert/extract with Altivec is very expensive,
+ // because they require store and reload with the attendant
+ // processor stall for load-hit-store. Until VSX is available,
+ // these need to be estimated as very costly.
+ if (ISD == ISD::EXTRACT_VECTOR_ELT ||
+ ISD == ISD::INSERT_VECTOR_ELT)
+ return LHSPenalty +
+ TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+
+ return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index);
+}
+
+unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+ assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+ "Invalid Opcode");
+
+ unsigned Cost =
+ TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+
+ // VSX loads/stores support unaligned access.
+ if (ST->hasVSX()) {
+ if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
+ return Cost;
+ }
+
+ bool UnalignedAltivec =
+ Src->isVectorTy() &&
+ Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
+ LT.second.getSizeInBits() == 128 &&
+ Opcode == Instruction::Load;
+
+ // PPC in general does not support unaligned loads and stores. They'll need
+ // to be decomposed based on the alignment factor.
+ unsigned SrcBytes = LT.second.getStoreSize();
+ if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
+ Cost += LT.first*(SrcBytes/Alignment-1);
+
+ // For a vector type, there is also scalarization overhead (only for
+ // stores, loads are expanded using the vector-load + permutation sequence,
+ // which is much less expensive).
+ if (Src->isVectorTy() && Opcode == Instruction::Store)
+ for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+ }
+
+ return Cost;
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 000000000000..5727dbc4170d
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target, llvm::ThePPC64LETarget;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() {
+ RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+ X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+ RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+ Y(ThePPC64Target, "ppc64", "PowerPC 64");
+
+ RegisterTarget<Triple::ppc64le, /*HasJIT=*/true>
+ Z(ThePPC64LETarget, "ppc64le", "PowerPC 64 LE");
+}