aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Target/CellSPU
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/CellSPU')
-rw-r--r--contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td449
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp43
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h30
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp94
-rw-r--r--contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h38
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.h31
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU.td66
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td408
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp333
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td57
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp256
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h80
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp141
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h41
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp1193
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp3259
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h185
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h43
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td320
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp453
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h84
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td4484
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp14
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h50
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td97
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNodes.td159
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp153
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUOperands.td664
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp356
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h101
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td183
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h19
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSchedule.td59
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h31
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp65
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h97
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp93
-rw-r--r--contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h87
-rw-r--r--contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp20
41 files changed, 14400 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 000000000000..cdb4099ffbca
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,449 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+ RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+ RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+ RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+ RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+ RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+ RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+ RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+ RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+ RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+ RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+ RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+ RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+ RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+ RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+ RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+ RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyi $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+ RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "mpyui $rT, $rA, $val", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+ RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB),
+ (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+ RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+ RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpys $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+ RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+ RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+ RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+ RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+ [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+ RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "and\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+ RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "andc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+ RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "andbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+ RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+ RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "andi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "or\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+ RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "addc\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "orbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "orhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "xor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+ RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "xorbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xorhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "xori\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nor\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+ RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "nand\t $rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+ Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+ (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqbi:
+ Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+ (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+ Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+ (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqby:
+ Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+ (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+ Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+ (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+ RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceq\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+ RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqi\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+ RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+ RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "ceqbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ceqh\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ceqhi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+ RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+ RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+ RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+ RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "cgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+ RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "cgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+ RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "cgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+ RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgth\t $rT, $rA, $rB", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+ RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgthi\t $rT, $rA, $val", BranchResolv,
+ [(set (v8i16 VECREG:$rT),
+ (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+ RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgt\t $rT, $rA, $rB", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+ RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "clgti\t $rT, $rA, $val", BranchResolv,
+ [(set (v4i32 VECREG:$rT),
+ (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+ RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "clgtb\t $rT, $rA, $rB", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+ RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+ "clgtbi\t $rT, $rA, $val", BranchResolv,
+ [(set (v16i8 VECREG:$rT),
+ (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fa\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fs\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+ RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fm\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+ RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fceq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+ RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+ RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmeq\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+ RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fcmgt\t $rT, $rA, $rB", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+ RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+ RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+ RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+ RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+ RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+ RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnma\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+ RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfnms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+ RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfms\t $rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
new file mode 100644
index 000000000000..4bad37eacaf7
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -0,0 +1,43 @@
+//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCAsmInfo.h"
+using namespace llvm;
+
+void SPULinuxMCAsmInfo::anchor() { }
+
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+ IsLittleEndian = false;
+
+ ZeroDirective = "\t.space\t";
+ Data64bitsDirective = "\t.quad\t";
+ AlignmentIsInBytes = false;
+
+ PCSymbol = ".";
+ CommentString = "#";
+ GlobalPrefix = "";
+ PrivateGlobalPrefix = ".L";
+
+ // Has leb128
+ HasLEB128 = true;
+
+ SupportsDebugInformation = true;
+
+ // Exception handling is not supported on CellSPU (think about it: you only
+ // have 256K for code+data. Would you support exception handling?)
+ ExceptionsType = ExceptionHandling::None;
+
+ // SPU assembly requires ".section" before ".bss"
+ UsesELFSectionDirectiveForBSS = true;
+}
+
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
new file mode 100644
index 000000000000..f786147b9267
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
@@ -0,0 +1,30 @@
+//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+
+ class SPULinuxMCAsmInfo : public MCAsmInfo {
+ virtual void anchor();
+ public:
+ explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
+ };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
new file mode 100644
index 000000000000..8450e2c6634c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -0,0 +1,94 @@
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell SPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCTargetDesc.h"
+#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCCodeGenInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPUMCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitSPUMCInstrInfo(X);
+ return X;
+}
+
+static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitSPUMCRegisterInfo(X, SPU::R0);
+ return X;
+}
+
+static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+ StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
+ InitSPUMCSubtargetInfo(X, TT, CPU, FS);
+ return X;
+}
+
+static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) {
+ MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT);
+
+ // Initial state of the frame pointer is R1.
+ MachineLocation Dst(MachineLocation::VirtualFP);
+ MachineLocation Src(SPU::R1, 0);
+ MAI->addInitialFrameState(0, Dst, Src);
+
+ return MAI;
+}
+
+static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM,
+ CodeModel::Model CM,
+ CodeGenOpt::Level OL) {
+ MCCodeGenInfo *X = new MCCodeGenInfo();
+ // For the time being, use static relocations, since there's really no
+ // support for PIC yet.
+ X->InitMCCodeGenInfo(Reloc::Static, CM, OL);
+ return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUTargetMC() {
+ // Register the MC asm info.
+ RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo);
+
+ // Register the MC codegen info.
+ TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget,
+ createSPUMCCodeGenInfo);
+
+ // Register the MC instruction info.
+ TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget,
+ createCellSPUMCRegisterInfo);
+
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+ createSPUMCSubtargetInfo);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
new file mode 100644
index 000000000000..d26449e8908f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides CellSPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class Target;
+
+extern Target TheCellSPUTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPU instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.h b/contrib/llvm/lib/Target/CellSPU/SPU.h
new file mode 100644
index 000000000000..c660131706cb
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,31 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "MCTargetDesc/SPUMCTargetDesc.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUTargetMachine;
+ class FunctionPass;
+ class formatted_raw_ostream;
+
+ FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+ FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
+
+}
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.td b/contrib/llvm/lib/Target/CellSPU/SPU.td
new file mode 100644
index 000000000000..e835b9cac8e1
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+ dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+ SubtargetFeature<"large_mem","UseLargeMem", "true",
+ "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+ let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+ let InstructionSet = SPUInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 000000000000..e051e047333a
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===//
+//
+// Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+ (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 000000000000..bea33b5362d2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,408 @@
+//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===//
+//
+// Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+// constant. In the scalar case, we're only interested in the
+// top two 32-bit slots, whereas we're interested in an exact
+// all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+// could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+// mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+// [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+// compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+ SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+ Pat<(cond R64C:$rA, R64C:$rB),
+ (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+ CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64eq:
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64compare:
+ CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+ CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64compare:
+ CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+ (XSWDv2i64 CLGTr64ugt.Fragment),
+ CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+ CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+ CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+ I64LGEv2i64.Fragment>;
+
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+ CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64eq:
+ CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64compare:
+ CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+ CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+ CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64compare:
+ CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+ (XSWDv2i64 CGTr64sgt.Fragment),
+ CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
+ (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGEr64compare:
+ CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+ CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+ CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+ CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+ // Plain old comparison, converts back to i32 scalar
+ def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
+ def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+ // SELB mask from FSM:
+ def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+ def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+ I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+ CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+ CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+ v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_add<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+ CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS
+ v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_sub<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB),
+ v2i64_sub_bg<(v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)>.Fragment,
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+ CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+ CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+ CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+ CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+ CodeFrag<(Av4i32
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3
+ v2i64_mul_ahi64<rA>.Fragment),
+ (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3
+ v2i64_mul_bshlq4<rB>.Fragment)),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+ v2i64_mul_ashlq4<rA>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+ v2i64_mul_bhi64<rB>.Fragment),
+ (Av4i32
+ (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment),
+ (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+ CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+ v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+ CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+ (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+ v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+ (ILv4i32 0),
+ (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+ v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+ v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+ v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+ (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+ (ILv4i32 0),
+ (FSMBIv4i32 0x0f0f)),
+ rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+ (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+ (COPY_TO_REGCLASS R64C:$rB, VECREG),
+ (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)),
+ v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+ (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+ SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+ [(set R64FP:$rT,
+ (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
new file mode 100644
index 000000000000..14021fef05d9
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -0,0 +1,333 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+ class SPUAsmPrinter : public AsmPrinter {
+ public:
+ explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
+ AsmPrinter(TM, Streamer) {}
+
+ virtual const char *getPassName() const {
+ return "STI CBEA SPU Assembly Printer";
+ }
+
+ /// printInstruction - This method is automatically generated by tablegen
+ /// from the instruction set description.
+ void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+ static const char *getRegisterName(unsigned RegNo);
+
+
+ void EmitInstruction(const MachineInstr *MI) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+ printInstruction(MI, OS);
+ OutStreamer.EmitRawText(OS.str());
+ }
+ void printOp(const MachineOperand &MO, raw_ostream &OS);
+
+ void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ if (MO.isReg()) {
+ O << getRegisterName(MO.getReg());
+ } else if (MO.isImm()) {
+ O << MO.getImm();
+ } else {
+ printOp(MO, O);
+ }
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O);
+
+
+ void
+ printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value < (1 << 8) && "Invalid u7 argument");
+ O << value;
+ }
+
+ void
+ printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ char value = MI->getOperand(OpNo).getImm();
+ O << (int) value;
+ O << "(";
+ printOperand(MI, OpNo+1, O);
+ O << ")";
+ }
+
+ void
+ printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ O << (short) MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
+ }
+
+ void
+ printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ // When used as the base register, r0 reads constant zero rather than
+ // the value contained in the register. For this reason, the darwin
+ // assembler requires that we print r0 as 0 (no r) when used as the base.
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ O << getRegisterName(MO.getReg()) << ", ";
+ printOperand(MI, OpNo+1, O);
+ }
+
+ void
+ printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ unsigned int value = MI->getOperand(OpNo).getImm();
+ assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+ O << value;
+ }
+
+ void
+ printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+ && "Invalid s10 argument");
+ O << value;
+ }
+
+ void
+ printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+ >> 16);
+ assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+ O << value;
+ }
+
+ void
+ printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ assert(MI->getOperand(OpNo).isImm() &&
+ "printDFormAddr first operand is not immediate");
+ int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+ int16_t value16 = int16_t(value);
+ assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+ && "Invalid dform s10 offset argument");
+ O << (value16 & ~0xf) << "(";
+ printOperand(MI, OpNo+1, O);
+ O << ")";
+ }
+
+ void
+ printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+ {
+ /* Note: operand 1 is an offset or symbol name. */
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ if (MI->getOperand(OpNo+1).isImm()) {
+ int displ = int(MI->getOperand(OpNo+1).getImm());
+ if (displ > 0)
+ O << "+" << displ;
+ else if (displ < 0)
+ O << displ;
+ }
+ }
+ }
+
+ void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ // Used to generate a ".-<target>", but it turns out that the assembler
+ // really wants the target.
+ //
+ // N.B.: This operand is used for call targets. Branch hints are another
+ // animal entirely.
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ O << "@h";
+ }
+ }
+
+ void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ printS16ImmOperand(MI, OpNo, O);
+ } else {
+ printOp(MI->getOperand(OpNo), O);
+ O << "@l";
+ }
+ }
+
+ /// Print local store address
+ void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+ printOp(MI->getOperand(OpNo), O);
+ }
+
+ void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (MI->getOperand(OpNo).isImm()) {
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value < 16)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ } else {
+ llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ }
+ }
+
+ void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
+ assert(MI->getOperand(OpNo).isImm() &&
+ "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+ int value = (int) MI->getOperand(OpNo).getImm();
+ assert((value >= 0 && value <= 32)
+ && "Invalid negated immediate rotate 7-bit argument");
+ O << -value;
+ }
+ };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ report_fatal_error("printOp() does not handle immediate values");
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol();
+ return;
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ return;
+ case MachineOperand::MO_ExternalSymbol:
+ // Computing the address of an external symbol, not calling it.
+ if (TM.getRelocationModel() != Reloc::Static) {
+ O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
+ << "$non_lazy_ptr";
+ return;
+ }
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ return;
+ case MachineOperand::MO_GlobalAddress:
+ // External or weakly linked global variables need non-lazily-resolved
+ // stubs
+ if (TM.getRelocationModel() != Reloc::Static) {
+ const GlobalValue *GV = MO.getGlobal();
+ if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+ GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+ O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+ return;
+ }
+ }
+ O << *Mang->getSymbol(MO.getGlobal());
+ return;
+ case MachineOperand::MO_MCSymbol:
+ O << *(MO.getMCSymbol());
+ return;
+ default:
+ O << "<unknown operand type: " << MO.getType() << ">";
+ return;
+ }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,
+ const char *ExtraCode, raw_ostream &O) {
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default: return true; // Unknown modifier.
+ case 'L': // Write second word of DImode reference.
+ // Verify that this operand has two consecutive registers.
+ if (!MI->getOperand(OpNo).isReg() ||
+ OpNo+1 == MI->getNumOperands() ||
+ !MI->getOperand(OpNo+1).isReg())
+ return true;
+ ++OpNo; // Return the high-part.
+ break;
+ }
+ }
+
+ printOperand(MI, OpNo, O);
+ return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, unsigned AsmVariant,
+ const char *ExtraCode,
+ raw_ostream &O) {
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+ printMemRegReg(MI, OpNo, O);
+ return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() {
+ RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 000000000000..9f9692bf67fe
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,57 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
+def RetCC_SPU : CallingConv<[
+ CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CCC_SPU : CallingConv<[
+ CCIfType<[i8, i16, i32, i64, i128, f32, f64,
+ v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
+ // Integer/FP values get stored in stack slots that are 8 bytes in size and
+ // 8-byte aligned if there are no more registers to hold them.
+ CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+ // Vectors get 16-byte stack slots that are 16-byte aligned.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCAssignToStack<16, 16>>
+]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 000000000000..fac806e1b0ea
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,256 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUFrameLowering.h"
+#include "SPU.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+ Subtarget(sti) {
+ LR[0].first = SPU::R0;
+ LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register. This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ return MFI->getStackSize() &&
+ (MF.getTarget().Options.DisableFramePointerElim(MF) ||
+ MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Get the number of bytes to allocate from the FrameInfo
+ unsigned FrameSize = MFI->getStackSize();
+
+ // Get the alignments provided by the target, and the maximum alignment
+ // (if any) of the fixed frame objects.
+ unsigned TargetAlign = getStackAlignment();
+ unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+ assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+ unsigned AlignMask = Align - 1;
+
+ // Get the maximum call frame size of all the calls.
+ unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+ // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+ // that allocations will be aligned.
+ if (MFI->hasVarSizedObjects())
+ maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+ // Update maximum call frame size.
+ MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+ // Include call frame size in total.
+ FrameSize += maxCallFrameSize;
+
+ // Make sure the frame is aligned.
+ FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+ // Update frame info.
+ MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ MachineModuleInfo &MMI = MF.getMMI();
+ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+ // Prepare for debug frame info.
+ bool hasDebugInfo = MMI.hasDebugInfo();
+ MCSymbol *FrameLabel = 0;
+
+ // Move MBBI back to the beginning of the function.
+ MBBI = MBB.begin();
+
+ // Work out frame sizes.
+ determineFrameLayout(MF);
+ int FrameSize = MFI->getStackSize();
+
+ assert((FrameSize & 0xf) == 0
+ && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+ if (hasDebugInfo) {
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+ }
+
+ // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+ // for the ABI
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+ .addReg(SPU::R1);
+ if (isInt<10>(FrameSize)) {
+ // Spill $sp to adjusted $sp
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+ .addReg(SPU::R1);
+ // Adjust $sp by required amout
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (isInt<16>(FrameSize)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(-16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+ .addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+
+ if (hasDebugInfo) {
+ std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+ // Show update of SP.
+ MachineLocation SPDst(MachineLocation::VirtualFP);
+ MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+ Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == SPU::R0) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+ }
+
+ // Mark effective beginning of when frame pointer is ready.
+ MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+ MachineLocation FPDst(SPU::R1);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ }
+ }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ const SPUInstrInfo &TII =
+ *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ int FrameSize = MFI->getStackSize();
+ int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ assert(MBBI->getOpcode() == SPU::RET &&
+ "Can only insert epilog into returning blocks");
+ assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->adjustsStack()) {
+ FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
+ // Reload $lr, adjust $sp by required amount
+ // Note: We do this to slightly improve dual issue -- not by much, but it
+ // is an opportunity for dual issue.
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(FrameSize + LinkSlotOffset)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+ .addReg(SPU::R1)
+ .addImm(FrameSize);
+ } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+ // $r2 to adjust $sp:
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+ .addImm(FrameSize);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+ .addReg(SPU::R1)
+ .addReg(SPU::R2);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+ .addImm(16)
+ .addReg(SPU::R1);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+ addReg(SPU::R2)
+ .addImm(16);
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+ .addReg(SPU::R2)
+ .addReg(SPU::R1);
+ } else {
+ report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+ }
+ }
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const{
+ // Mark LR and SP unused, since the prolog spills them to stack and
+ // we don't want anyone else to spill them for us.
+ //
+ // Also, unless R2 is really used someday, don't spill it automatically.
+ MF.getRegInfo().setPhysRegUnused(SPU::R0);
+ MF.getRegInfo().setPhysRegUnused(SPU::R1);
+ MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
new file mode 100644
index 000000000000..11c52818dd9c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h
@@ -0,0 +1,80 @@
+//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
+
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class SPUSubtarget;
+
+ class SPUFrameLowering: public TargetFrameLowering {
+ const SPUSubtarget &Subtarget;
+ std::pair<unsigned, int> LR[1];
+
+ public:
+ SPUFrameLowering(const SPUSubtarget &sti);
+
+ //! Determine the frame's layour
+ void determineFrameLayout(MachineFunction &MF) const;
+
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ //! Prediate: Target has dedicated frame pointer
+ bool hasFP(const MachineFunction &MF) const;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS = NULL) const;
+
+ //! Return a function's saved spill slots
+ /*!
+ For CellSPU, a function's saved spill slots is just the link register.
+ */
+ const std::pair<unsigned, int> *
+ getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+ //! Stack slot size (16 bytes)
+ static int stackSlotSize() {
+ return 16;
+ }
+ //! Maximum frame offset representable by a signed 10-bit integer
+ /*!
+ This is the maximum frame offset that can be expressed as a 10-bit
+ integer, used in D-form addresses.
+ */
+ static int maxFrameOffset() {
+ return ((1 << 9) - 1) * stackSlotSize();
+ }
+ //! Minimum frame offset representable by a signed 10-bit integer
+ static int minFrameOffset() {
+ return -(1 << 9) * stackSlotSize();
+ }
+ //! Minimum frame size (enough to spill LR + SP)
+ static int minStackSize() {
+ return (2 * stackSlotSize());
+ }
+ //! Convert frame index to stack offset
+ static int FItoStackOffset(int frame_index) {
+ return frame_index * stackSlotSize();
+ }
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 000000000000..403d7ef1fd9e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,141 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+ TII(tii),
+ EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
+{
+ // Initial thoughts on how to do this, but this code cannot work unless the
+ // function's prolog and epilog code are also being scheduled so that we can
+ // accurately determine which pipeline is being scheduled.
+#if 0
+ assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
+ const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+ ScheduleHazardRecognizer::HazardType retval = NoHazard;
+ bool mustBeOdd = false;
+
+ switch (Node->getOpcode()) {
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16:
+ case SPU::LQAv16i8:
+ case SPU::LQAv8i16:
+ case SPU::LQAv4i32:
+ case SPU::LQAv4f32:
+ case SPU::LQAv2f64:
+ case SPU::LQAr128:
+ case SPU::LQAr64:
+ case SPU::LQAr32:
+ case SPU::LQXv4i32:
+ case SPU::LQXr128:
+ case SPU::LQXr64:
+ case SPU::LQXr32:
+ case SPU::LQXr16:
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8:
+ case SPU::STQAv16i8:
+ case SPU::STQAv8i16:
+ case SPU::STQAv4i32:
+ case SPU::STQAv4f32:
+ case SPU::STQAv2f64:
+ case SPU::STQAr128:
+ case SPU::STQAr64:
+ case SPU::STQAr32:
+ case SPU::STQAr16:
+ case SPU::STQAr8:
+ case SPU::STQXv16i8:
+ case SPU::STQXv8i16:
+ case SPU::STQXv4i32:
+ case SPU::STQXv4f32:
+ case SPU::STQXv2f64:
+ case SPU::STQXr128:
+ case SPU::STQXr64:
+ case SPU::STQXr32:
+ case SPU::STQXr16:
+ case SPU::STQXr8:
+ case SPU::RET:
+ mustBeOdd = true;
+ break;
+ default:
+ // Assume that this instruction can be on the even pipe
+ break;
+ }
+
+ if (mustBeOdd && !EvenOdd)
+ retval = Hazard;
+
+ DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
+ << retval << "\n");
+ EvenOdd ^= 1;
+ return retval;
+#else
+ return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+ DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+ AdvanceCycle();
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 000000000000..675632cc7f13
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+ const TargetInstrInfo &TII;
+ int EvenOdd;
+
+public:
+ SPUHazardRecognizer(const TargetInstrInfo &TII);
+ virtual HazardType getHazardType(SUnit *SU, int Stalls);
+ virtual void EmitInstruction(SUnit *SU);
+ virtual void AdvanceCycle();
+ virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 000000000000..c27caeae7d45
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1193 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameLowering.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+ bool
+ isI32IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+ bool
+ isI32IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isUInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+ bool
+ isI16IntS10Immediate(ConstantSDNode *CN)
+ {
+ return isInt<10>(CN->getSExtValue());
+ }
+
+ //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+ bool
+ isI16IntU10Immediate(ConstantSDNode *CN)
+ {
+ return isUInt<10>((short) CN->getZExtValue());
+ }
+
+ //! ConstantSDNode predicate for signed 16-bit values
+ /*!
+ \arg CN The constant SelectionDAG node holding the value
+ \arg Imm The returned 16-bit value, if returning true
+
+ This predicate tests the value in \a CN to see whether it can be
+ represented as a 16-bit, sign-extended quantity. Returns true if
+ this is the case.
+ */
+ bool
+ isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+ {
+ EVT vt = CN->getValueType(0);
+ Imm = (short) CN->getZExtValue();
+ if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+ return true;
+ } else if (vt == MVT::i32) {
+ int32_t i_val = (int32_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ } else {
+ int64_t i_val = (int64_t) CN->getZExtValue();
+ short s_val = (short) i_val;
+ return i_val == s_val;
+ }
+ }
+
+ //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+ static bool
+ isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+ {
+ EVT vt = FPN->getValueType(0);
+ if (vt == MVT::f32) {
+ int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+ int sval = (int) ((val << 16) >> 16);
+ Imm = (short) val;
+ return val == sval;
+ }
+
+ return false;
+ }
+
+ //! Generate the carry-generate shuffle mask.
+ SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //! Generate the borrow-generate shuffle mask
+ SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+ SmallVector<SDValue, 16 > ShufBytes;
+
+ // Create the shuffle mask for "rotating" the borrow up one register slot
+ // once the borrow is generated.
+ ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+ ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size());
+ }
+
+ //===------------------------------------------------------------------===//
+ /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+ /// instructions for SelectionDAG operations.
+ ///
+ class SPUDAGToDAGISel :
+ public SelectionDAGISel
+ {
+ const SPUTargetMachine &TM;
+ const SPUTargetLowering &SPUtli;
+ unsigned GlobalBaseReg;
+
+ public:
+ explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm),
+ SPUtli(*tm.getTargetLowering())
+ { }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ // Make sure we re-emit a set of the global base reg if necessary
+ GlobalBaseReg = 0;
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type
+ /// i32.
+ inline SDValue getI32Imm(uint32_t Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
+ /// getSmallIPtrImm - Return a target constant of pointer type.
+ inline SDValue getSmallIPtrImm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+ }
+
+ SDNode *emitBuildVector(SDNode *bvNode) {
+ EVT vecVT = bvNode->getValueType(0);
+ DebugLoc dl = bvNode->getDebugLoc();
+
+ // Check to see if this vector can be represented as a CellSPU immediate
+ // constant by invoking all of the instruction selection predicates:
+ if (((vecVT == MVT::v8i16) &&
+ (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+ ((vecVT == MVT::v4i32) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+ (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+ ((vecVT == MVT::v2i64) &&
+ ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+ HandleSDNode Dummy(SDValue(bvNode, 0));
+ if (SDNode *N = Select(bvNode))
+ return N;
+ return Dummy.getValue().getNode();
+ }
+
+ // No, need to emit a constant pool spill:
+ std::vector<Constant*> CV;
+
+ for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
+ ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ }
+
+ const Constant *CP = ConstantVector::get(CV);
+ SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+ unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+ SDValue CGPoolOffset =
+ SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
+
+ HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ MachinePointerInfo::getConstantPool(),
+ false, false, false, Alignment));
+ CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ }
+
+ /// Select - Convert the specified operand from a target-independent to a
+ /// target-specific node if it hasn't already been changed.
+ SDNode *Select(SDNode *N);
+
+ //! Emit the instruction sequence for i64 shl
+ SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
+
+ //! Emit the instruction sequence for i64 srl
+ SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
+
+ //! Emit the instruction sequence for i64 sra
+ SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
+
+ //! Emit the necessary sequence for loading i64 constants:
+ SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
+
+ //! Alternate instruction emit sequence for loading i64 constants
+ SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
+
+ //! Returns true if the address N is an A-form (local store) address
+ bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ //! D-form address predicate
+ bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// Alternate D-form address using i7 offset predicate
+ bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base);
+
+ /// D-form address selection workhorse
+ bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base, int minOffset, int maxOffset);
+
+ //! Address predicate if N can be expressed as an indexed [r+r] operation.
+ bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index);
+
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
+ virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) {
+ SDValue Op0, Op1;
+ switch (ConstraintCode) {
+ default: return true;
+ case 'm': // memory
+ if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+ && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
+ SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
+ break;
+ case 'o': // offsetable
+ if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+ && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
+ Op0 = Op;
+ Op1 = getSmallIPtrImm(0);
+ }
+ break;
+ case 'v': // not offsetable
+#if 1
+ llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+ SelectAddrIdxOnly(Op, Op, Op0, Op1);
+ break;
+#endif
+ }
+
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
+ return false;
+ }
+
+ virtual const char *getPassName() const {
+ return "Cell SPU DAG->DAG Pattern Instruction Selection";
+ }
+
+ private:
+ SDValue getRC( MVT );
+
+ // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+ };
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ // These match the addr256k operand type:
+ EVT OffsVT = MVT::i16;
+ SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+ int64_t val;
+
+ switch (N.getOpcode()) {
+ case ISD::Constant:
+ val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+ Base = CurDAG->getTargetConstant( val , MVT::i32);
+ Index = Zero;
+ return true;
+ case ISD::ConstantPool:
+ case ISD::GlobalAddress:
+ report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
+ /*NOTREACHED*/
+
+ case ISD::TargetConstant:
+ case ISD::TargetGlobalAddress:
+ case ISD::TargetJumpTable:
+ report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+ "not wrapped as A-form address.");
+ /*NOTREACHED*/
+
+ case SPUISD::AFormAddr:
+ // Just load from memory if there's only a single use of the location,
+ // otherwise, this will get handled below with D-form offset addresses
+ if (N.hasOneUse()) {
+ SDValue Op0 = N.getOperand(0);
+ switch (Op0.getOpcode()) {
+ case ISD::TargetConstantPool:
+ case ISD::TargetJumpTable:
+ Base = Op0;
+ Index = Zero;
+ return true;
+
+ case ISD::TargetGlobalAddress: {
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+ const GlobalValue *GV = GSDN->getGlobal();
+ if (GV->getAlignment() == 16) {
+ Base = Op0;
+ Index = Zero;
+ return true;
+ }
+ break;
+ }
+ }
+ }
+ break;
+ }
+ return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+ SDValue &Base) {
+ const int minDForm2Offset = -(1 << 7);
+ const int maxDForm2Offset = (1 << 7) - 1;
+ return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+ maxDForm2Offset);
+}
+
+/*!
+ \arg Op The ISD instruction (ignored)
+ \arg N The address to be tested
+ \arg Base Base address register/pointer
+ \arg Index Base address index
+
+ Examine the input address by a base register plus a signed 10-bit
+ displacement, [r+I10] (D-form address).
+
+ \return true if \a N is a D-form address with \a Base and \a Index set
+ to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ return DFormAddressPredicate(Op, N, Base, Index,
+ SPUFrameLowering::minFrameOffset(),
+ SPUFrameLowering::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index, int minOffset,
+ int maxOffset) {
+ unsigned Opc = N.getOpcode();
+ EVT PtrTy = SPUtli.getPointerTy();
+
+ if (Opc == ISD::FrameIndex) {
+ // Stack frame index must be less than 512 (divided by 16):
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
+ << FI << "\n");
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (Opc == ISD::ADD) {
+ // Generated by getelementptr
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+ || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (Op1.getOpcode() == ISD::Constant
+ || Op1.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op0.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op0;
+ return true;
+ }
+ } else if (Op0.getOpcode() == ISD::Constant
+ || Op0.getOpcode() == ISD::TargetConstant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ int32_t offset = int32_t(CN->getSExtValue());
+
+ if (Op1.getOpcode() == ISD::FrameIndex) {
+ FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
+ int FI = int(FIN->getIndex());
+ DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+ << " frame index = " << FI << "\n");
+
+ if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+ return true;
+ }
+ } else if (offset > minOffset && offset < maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = Op1;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Indirect with constant offset -> D-Form address
+ const SDValue Op0 = N.getOperand(0);
+ const SDValue Op1 = N.getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::Hi
+ && Op1.getOpcode() == SPUISD::Lo) {
+ // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+ Base = CurDAG->getTargetConstant(0, PtrTy);
+ Index = N;
+ return true;
+ } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+ int32_t offset = 0;
+ SDValue idxOp;
+
+ if (isa<ConstantSDNode>(Op1)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op0;
+ } else if (isa<ConstantSDNode>(Op0)) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+ offset = int32_t(CN->getSExtValue());
+ idxOp = Op1;
+ }
+
+ if (offset >= minOffset && offset <= maxOffset) {
+ Base = CurDAG->getTargetConstant(offset, PtrTy);
+ Index = idxOp;
+ return true;
+ }
+ }
+ } else if (Opc == SPUISD::AFormAddr) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == SPUISD::LDRESULT) {
+ Base = CurDAG->getTargetConstant(0, N.getValueType());
+ Index = N;
+ return true;
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
+ ||Opc == ISD::UNDEF
+ ||Opc == ISD::Constant) {
+ unsigned OpOpc = Op->getOpcode();
+
+ if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+ // Direct load/store without getelementptr
+ SDValue Offs;
+
+ Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
+
+ if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+ if (Offs.getOpcode() == ISD::UNDEF)
+ Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+ Base = Offs;
+ Index = N;
+ return true;
+ }
+ } else {
+ /* If otherwise unadorned, default to D-form address with 0 offset: */
+ if (Opc == ISD::CopyFromReg) {
+ Index = N.getOperand(1);
+ } else {
+ Index = N;
+ }
+
+ Base = CurDAG->getTargetConstant(0, Index.getValueType());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address operand
+ \arg Base The base pointer operand
+ \arg Index The offset/index operand
+
+ If the address \a N can be expressed as an A-form or D-form address, returns
+ false. Otherwise, creates two operands, Base and Index that will become the
+ (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+ SDValue &Index) {
+ if (!SelectAFormAddr(Op, N, Base, Index)
+ && !SelectDFormAddr(Op, N, Base, Index)) {
+ // If the address is neither A-form or D-form, punt and use an X-form
+ // address:
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+ switch( VT.SimpleTy ) {
+ case MVT::i8:
+ return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+ case MVT::i16:
+ return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+ case MVT::i32:
+ return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+ case MVT::f32:
+ return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+ case MVT::i64:
+ return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+ case MVT::i128:
+ return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+ default:
+ assert( false && "add a new case here" );
+ return SDValue();
+ }
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ int n_ops = -1;
+ unsigned NewOpc = 0;
+ EVT OpVT = N->getValueType(0);
+ SDValue Ops[8];
+ DebugLoc dl = N->getDebugLoc();
+
+ if (N->isMachineOpcode())
+ return NULL; // Already selected.
+
+ if (Opc == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+ SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
+
+ if (FI < 128) {
+ NewOpc = SPU::AIr32;
+ Ops[0] = TFI;
+ Ops[1] = Imm0;
+ n_ops = 2;
+ } else {
+ NewOpc = SPU::Ar32;
+ Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
+ N->getValueType(0), TFI),
+ 0);
+ n_ops = 2;
+ }
+ } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+ // Catch the i64 constants that end up here. Note: The backend doesn't
+ // attempt to legalize the constant (it's useless because DAGCombiner
+ // will insert 64-bit constants and we can't stop it).
+ return SelectI64Constant(N, OpVT, N->getDebugLoc());
+ } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+ && OpVT == MVT::i64) {
+ SDValue Op0 = N->getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+ EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ Op0VT, (128 / Op0VT.getSizeInBits()));
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue shufMask;
+
+ switch (Op0VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
+ /*NOTREACHED*/
+ case MVT::i32:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x00010203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800203, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x80808003, MVT::i32),
+ CurDAG->getConstant(0x80808080, MVT::i32),
+ CurDAG->getConstant(0x8080800b, MVT::i32));
+ break;
+ }
+
+ SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
+
+ HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+ Op0VecVT, Op0));
+
+ SDValue PromScalar;
+ if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+ PromScalar = SDValue(N, 0);
+ else
+ PromScalar = PromoteScalar.getValue();
+
+ SDValue zextShuffle =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ PromScalar, PromScalar,
+ SDValue(shufMaskLoad, 0));
+
+ HandleSDNode Dummy2(zextShuffle);
+ if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+ zextShuffle = SDValue(N, 0);
+ else
+ zextShuffle = Dummy2.getValue();
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ SelectCode(Dummy.getValue().getNode());
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+ SDNode *CGLoad =
+ emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
+ } else if (Opc == ISD::TRUNCATE) {
+ SDValue Op0 = N->getOperand(0);
+ if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+ && OpVT == MVT::i32
+ && Op0.getValueType() == MVT::i64) {
+ // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+ //
+ // Take advantage of the fact that the upper 32 bits are in the
+ // i32 preferred slot and avoid shuffle gymnastics:
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+ if (CN != 0) {
+ unsigned shift_amt = unsigned(CN->getZExtValue());
+
+ if (shift_amt >= 32) {
+ SDNode *hi32 =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ Op0.getOperand(0), getRC(MVT::i32));
+
+ shift_amt -= 32;
+ if (shift_amt > 0) {
+ // Take care of the additional shift, if present:
+ SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+ unsigned Opc = SPU::ROTMAIr32_i32;
+
+ if (Op0.getOpcode() == ISD::SRL)
+ Opc = SPU::ROTMr32;
+
+ hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
+ shift);
+ }
+
+ return hi32;
+ }
+ }
+ }
+ } else if (Opc == ISD::SHL) {
+ if (OpVT == MVT::i64)
+ return SelectSHLi64(N, OpVT);
+ } else if (Opc == ISD::SRL) {
+ if (OpVT == MVT::i64)
+ return SelectSRLi64(N, OpVT);
+ } else if (Opc == ISD::SRA) {
+ if (OpVT == MVT::i64)
+ return SelectSRAi64(N, OpVT);
+ } else if (Opc == ISD::FNEG
+ && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+ DebugLoc dl = N->getDebugLoc();
+ // Check if the pattern is a special form of DFNMS:
+ // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getOpcode() == ISD::FSUB) {
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == ISD::FMUL) {
+ unsigned Opc = SPU::DFNMSf64;
+ if (OpVT == MVT::v2f64)
+ Opc = SPU::DFNMSv2f64;
+
+ return CurDAG->getMachineNode(Opc, dl, OpVT,
+ Op00.getOperand(0),
+ Op00.getOperand(1),
+ Op0.getOperand(1));
+ }
+ }
+
+ SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+ SDNode *signMask = 0;
+ unsigned Opc = SPU::XORfneg64;
+
+ if (OpVT == MVT::f64) {
+ signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
+ } else if (OpVT == MVT::v2f64) {
+ Opc = SPU::XORfnegvec;
+ signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+ MVT::v2i64,
+ negConst, negConst).getNode());
+ }
+
+ return CurDAG->getMachineNode(Opc, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ } else if (Opc == ISD::FABS) {
+ if (OpVT == MVT::f64) {
+ SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+ return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ } else if (OpVT == MVT::v2f64) {
+ SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+ SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+ absConst, absConst);
+ SDNode *signMask = emitBuildVector(absVec.getNode());
+ return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
+ N->getOperand(0), SDValue(signMask, 0));
+ }
+ } else if (Opc == SPUISD::LDRESULT) {
+ // Custom select instructions for LDRESULT
+ EVT VT = N->getValueType(0);
+ SDValue Arg = N->getOperand(0);
+ SDValue Chain = N->getOperand(1);
+ SDNode *Result;
+
+ Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+ MVT::Other, Arg,
+ getRC( VT.getSimpleVT()), Chain);
+ return Result;
+
+ } else if (Opc == SPUISD::IndirectAddr) {
+ // Look at the operands: SelectCode() will catch the cases that aren't
+ // specifically handled here.
+ //
+ // SPUInstrInfo catches the following patterns:
+ // (SPUindirect (SPUhi ...), (SPUlo ...))
+ // (SPUindirect $sp, imm)
+ EVT VT = N->getValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ RegisterSDNode *RN;
+
+ if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+ || (Op0.getOpcode() == ISD::Register
+ && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+ && RN->getReg() != SPU::R1))) {
+ NewOpc = SPU::Ar32;
+ Ops[1] = Op1;
+ if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+ Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+ if (isInt<10>(CN->getSExtValue())) {
+ NewOpc = SPU::AIr32;
+ Ops[1] = Op1;
+ } else {
+ Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+ N->getValueType(0),
+ Op1),
+ 0);
+ }
+ }
+ Ops[0] = Op0;
+ n_ops = 2;
+ }
+ }
+
+ if (n_ops > 0) {
+ if (N->hasOneUse())
+ return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+ else
+ return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
+ } else
+ return SelectCode(N);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+ SDValue SelMaskVal;
+ DebugLoc dl = N->getDebugLoc();
+
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
+ SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+ SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+ ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0, OpVT));
+ VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(ZeroFill, 0),
+ SDValue(VecOp0, 0),
+ SDValue(SelMask, 0));
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
+ SDValue Op0 = N->getOperand(0);
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ SDNode *VecOp0, *Shift = 0;
+ DebugLoc dl = N->getDebugLoc();
+
+ VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+ Op0, getRC(MVT::v2i64) );
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+ SDValue(VecOp0, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : VecOp0), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *Bytes =
+ CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(3, ShiftAmtVT));
+ SDNode *Bits =
+ CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+ ShiftAmt,
+ CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+ // Ensure that the shift amounts are negated!
+ Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bytes, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ SDValue(Bits, 0),
+ CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
+ SDValue(VecOp0, 0), SDValue(Bytes, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(Bits, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
+ // Promote Op0 to vector
+ EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+ OpVT, (128 / OpVT.getSizeInBits()));
+ SDValue ShiftAmt = N->getOperand(1);
+ EVT ShiftAmtVT = ShiftAmt.getValueType();
+ DebugLoc dl = N->getDebugLoc();
+
+ SDNode *VecOp0 =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ VecVT, N->getOperand(0), getRC(MVT::v2i64));
+
+ SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+ SDNode *SignRot =
+ CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+ SDValue(VecOp0, 0), SignRotAmt);
+ SDNode *UpperHalfSign =
+ CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
+
+ SDNode *UpperHalfSignMask =
+ CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+ SDNode *UpperLowerMask =
+ CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
+ CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+ SDNode *UpperLowerSelect =
+ CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+ SDValue(UpperHalfSignMask, 0),
+ SDValue(VecOp0, 0),
+ SDValue(UpperLowerMask, 0));
+
+ SDNode *Shift = 0;
+
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+ unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+ unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+ if (bytes > 0) {
+ bytes = 31 - bytes;
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
+ SDValue(UpperLowerSelect, 0),
+ CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+ }
+
+ if (bits > 0) {
+ bits = 8 - bits;
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
+ SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+ CurDAG->getTargetConstant(bits, ShiftAmtVT));
+ }
+ } else {
+ SDNode *NegShift =
+ CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+ ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+ SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+ Shift =
+ CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
+ SDValue(Shift, 0), SDValue(NegShift, 0));
+ }
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
+ DebugLoc dl) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(N);
+ return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
+ DebugLoc dl) {
+ EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
+ SDValue i64vec =
+ SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+ // Here's where it gets interesting, because we have to parse out the
+ // subtree handed back in i64vec:
+
+ if (i64vec.getOpcode() == ISD::BITCAST) {
+ // The degenerate case where the upper and lower bits in the splat are
+ // identical:
+ SDValue Op0 = i64vec.getOperand(0);
+
+ ReplaceUses(i64vec, Op0);
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(Op0.getNode()), 0),
+ getRC(MVT::i64));
+ } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+ SDValue lhs = i64vec.getOperand(0);
+ SDValue rhs = i64vec.getOperand(1);
+ SDValue shufmask = i64vec.getOperand(2);
+
+ if (lhs.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(lhs, lhs.getOperand(0));
+ lhs = lhs.getOperand(0);
+ }
+
+ SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+ ? lhs.getNode()
+ : emitBuildVector(lhs.getNode()));
+
+ if (rhs.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(rhs, rhs.getOperand(0));
+ rhs = rhs.getOperand(0);
+ }
+
+ SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+ ? rhs.getNode()
+ : emitBuildVector(rhs.getNode()));
+
+ if (shufmask.getOpcode() == ISD::BITCAST) {
+ ReplaceUses(shufmask, shufmask.getOperand(0));
+ shufmask = shufmask.getOperand(0);
+ }
+
+ SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+ ? shufmask.getNode()
+ : emitBuildVector(shufmask.getNode()));
+
+ SDValue shufNode =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+ SDValue(shufMaskNode, 0));
+ HandleSDNode Dummy(shufNode);
+ SDNode *SN = SelectCode(Dummy.getValue().getNode());
+ if (SN == 0) SN = Dummy.getValue().getNode();
+
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+ OpVT, SDValue(SN, 0), getRC(MVT::i64));
+ } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+ return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+ SDValue(emitBuildVector(i64vec.getNode()), 0),
+ getRC(MVT::i64));
+ } else {
+ report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+ "condition");
+ }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+ return new SPUDAGToDAGISel(TM);
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 000000000000..062374127e2f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,3259 @@
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameLowering.h"
+#include "SPUMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ // Byte offset of the preferred slot (counted from the MSB)
+ int prefslotOffset(EVT VT) {
+ int retval=0;
+ if (VT==MVT::i1) retval=3;
+ if (VT==MVT::i8) retval=3;
+ if (VT==MVT::i16) retval=2;
+
+ return retval;
+ }
+
+ //! Expand a library call into an actual call DAG node
+ /*!
+ \note
+ This code is taken from SelectionDAGLegalize, since it is not exposed as
+ part of the LLVM SelectionDAG API.
+ */
+
+ SDValue
+ ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+ bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
+ // The input chain to this libcall is the entry node of the function.
+ // Legalizing the call will automatically add the previous call to the
+ // dependence.
+ SDValue InChain = DAG.getEntryNode();
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op.getOperand(i).getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op.getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ Type *RetTy =
+ Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC),
+ /*isTailCall=*/false,
+ /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Op.getDebugLoc());
+
+ return CallInfo.first;
+ }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+ : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+ SPUTM(TM) {
+
+ // Use _setjmp/_longjmp instead of setjmp/longjmp.
+ setUseUnderscoreSetJmp(true);
+ setUseUnderscoreLongJmp(true);
+
+ // Set RTLIB libcall names as used by SPU:
+ setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+ // Set up the SPU's register classes:
+ addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
+ addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
+ addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
+ addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
+ addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
+ addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
+ addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+ // SPU has no sign or zero extended loads for i1, i8, i16:
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+
+ setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ // SPU constant load actions are custom lowered:
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+ // SPU's loads and stores have to be custom lowered:
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ // Expand the jumptable branches
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+
+ // Custom lower SELECT_CC for most cases, but expand by default
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
+
+ // SPU has no intrinsics for these particular operations:
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
+
+ // SPU has no division/remainder instructions
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i16, Expand);
+ setOperationAction(ISD::UREM, MVT::i16, Expand);
+ setOperationAction(ISD::SDIV, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i16, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+ setOperationAction(ISD::SDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
+
+ // We don't support sin/cos/sqrt/fmod
+ setOperationAction(ISD::FSIN , MVT::f64, Expand);
+ setOperationAction(ISD::FCOS , MVT::f64, Expand);
+ setOperationAction(ISD::FREM , MVT::f64, Expand);
+ setOperationAction(ISD::FSIN , MVT::f32, Expand);
+ setOperationAction(ISD::FCOS , MVT::f32, Expand);
+ setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+ // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+ // for f32!)
+ setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+ setOperationAction(ISD::FMA, MVT::f64, Expand);
+ setOperationAction(ISD::FMA, MVT::f32, Expand);
+
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+ // SPU can do rotate right and left, so legalize it... but customize for i8
+ // because instructions don't exist.
+
+ // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+ // .td files.
+ setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
+ setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
+
+ setOperationAction(ISD::ROTL, MVT::i32, Legal);
+ setOperationAction(ISD::ROTL, MVT::i16, Legal);
+ setOperationAction(ISD::ROTL, MVT::i8, Custom);
+
+ // SPU has no native version of shift left/right for i8
+ setOperationAction(ISD::SHL, MVT::i8, Custom);
+ setOperationAction(ISD::SRL, MVT::i8, Custom);
+ setOperationAction(ISD::SRA, MVT::i8, Custom);
+
+ // Make these operations legal and handle them during instruction selection:
+ setOperationAction(ISD::SHL, MVT::i64, Legal);
+ setOperationAction(ISD::SRL, MVT::i64, Legal);
+ setOperationAction(ISD::SRA, MVT::i64, Legal);
+
+ // Custom lower i8, i32 and i64 multiplications
+ setOperationAction(ISD::MUL, MVT::i8, Custom);
+ setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction(ISD::MUL, MVT::i64, Legal);
+
+ // Expand double-width multiplication
+ // FIXME: It would probably be reasonable to support some of these operations
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
+ // Need to custom handle (some) common i8, i64 math ops
+ setOperationAction(ISD::ADD, MVT::i8, Custom);
+ setOperationAction(ISD::ADD, MVT::i64, Legal);
+ setOperationAction(ISD::SUB, MVT::i8, Custom);
+ setOperationAction(ISD::SUB, MVT::i64, Legal);
+
+ // SPU does not have BSWAP. It does have i32 support CTLZ.
+ // CTPOP has to be custom lowered.
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
+
+ setOperationAction(ISD::CTTZ , MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand);
+
+ setOperationAction(ISD::CTLZ , MVT::i8, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i16, Promote);
+ setOperationAction(ISD::CTLZ , MVT::i32, Legal);
+ setOperationAction(ISD::CTLZ , MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ , MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand);
+
+ // SPU has a version of select that implements (a&~c)|(b&c), just like
+ // select ought to work:
+ setOperationAction(ISD::SELECT, MVT::i8, Legal);
+ setOperationAction(ISD::SELECT, MVT::i16, Legal);
+ setOperationAction(ISD::SELECT, MVT::i32, Legal);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::i8, Legal);
+ setOperationAction(ISD::SETCC, MVT::i16, Legal);
+ setOperationAction(ISD::SETCC, MVT::i32, Legal);
+ setOperationAction(ISD::SETCC, MVT::i64, Legal);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+
+ // Custom lower i128 -> i64 truncates
+ setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+ // Custom lower i32/i64 -> i128 sign extend
+ setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+ // to expand to a libcall, hence the custom lowering:
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
+
+ // FDIV on SPU requires custom lowering
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
+
+ // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+ setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+
+ // We cannot sextinreg(i1). Expand to shifts.
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ // We want to legalize GlobalAddress and ConstantPool nodes into the
+ // appropriate instructions to materialize the address.
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+ ++sctype) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ }
+
+ // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+ setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
+
+ // Cell SPU has instructions for converting between i64 and fp.
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+ // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+ // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+ setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+ // First set operation action for all vector types to expand. Then we
+ // will selectively turn on ones that can be effectively codegen'd.
+ addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+ addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+ for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+ // Set operation actions to legal types only.
+ if (!isTypeLegal(VT)) continue;
+
+ // add/sub are legal for all supported vector VT's.
+ setOperationAction(ISD::ADD, VT, Legal);
+ setOperationAction(ISD::SUB, VT, Legal);
+ // mul has to be custom lowered.
+ setOperationAction(ISD::MUL, VT, Legal);
+
+ setOperationAction(ISD::AND, VT, Legal);
+ setOperationAction(ISD::OR, VT, Legal);
+ setOperationAction(ISD::XOR, VT, Legal);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Legal);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ // These operations need to be expanded:
+ setOperationAction(ISD::SDIV, VT, Expand);
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::UDIV, VT, Expand);
+ setOperationAction(ISD::UREM, VT, Expand);
+
+ // Expand all trunc stores
+ for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) {
+ MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j;
+ setTruncStoreAction(VT, TargetVT, Expand);
+ }
+
+ // Custom lower build_vector, constant pool spills, insert and
+ // extract vector elements:
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ }
+
+ setOperationAction(ISD::SHL, MVT::v2i64, Expand);
+
+ setOperationAction(ISD::AND, MVT::v16i8, Custom);
+ setOperationAction(ISD::OR, MVT::v16i8, Custom);
+ setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+ setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
+
+ setStackPointerRegisterToSaveRestore(SPU::R1);
+
+ // We have target-specific dag combine patterns for the following nodes:
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+
+ setMinFunctionAlignment(3);
+
+ computeRegisterProperties();
+
+ // Set pre-RA register scheduler default to BURR, which produces slightly
+ // better code than the default (could also be TDRR, but TargetLowering.h
+ // needs a mod to support that model):
+ setSchedulingPreference(Sched::RegPressure);
+}
+
+const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default: return 0;
+ case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG";
+ case SPUISD::Hi: return "SPUISD::Hi";
+ case SPUISD::Lo: return "SPUISD::Lo";
+ case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr";
+ case SPUISD::AFormAddr: return "SPUISD::AFormAddr";
+ case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr";
+ case SPUISD::LDRESULT: return "SPUISD::LDRESULT";
+ case SPUISD::CALL: return "SPUISD::CALL";
+ case SPUISD::SHUFB: return "SPUISD::SHUFB";
+ case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK";
+ case SPUISD::CNTB: return "SPUISD::CNTB";
+ case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC";
+ case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT";
+ case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS";
+ case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES";
+ case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL";
+ case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR";
+ case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT";
+ case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS";
+ case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK";
+ case SPUISD::SELB: return "SPUISD::SELB";
+ case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER";
+ case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER";
+ case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER";
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
+ // i8, i16 and i32 are valid SETCC result types
+ MVT::SimpleValueType retval;
+
+ switch(VT.getSimpleVT().SimpleTy){
+ case MVT::i1:
+ case MVT::i8:
+ retval = MVT::i8; break;
+ case MVT::i16:
+ retval = MVT::i16; break;
+ case MVT::i32:
+ default:
+ retval = MVT::i32;
+ }
+ return retval;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1 v16i8,ch = load
+%2 v16i8,ch = rotate %1
+%3 v4f8, ch = bitconvert %2
+%4 f32 = vec2perfslot %3
+%5 f64 = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ LoadSDNode *LN = cast<LoadSDNode>(Op);
+ SDValue the_chain = LN->getChain();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ EVT InVT = LN->getMemoryVT();
+ EVT OutVT = Op.getValueType();
+ ISD::LoadExtType ExtType = LN->getExtensionType();
+ unsigned alignment = LN->getAlignment();
+ int pso = prefslotOffset(InVT);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+ (128 / InVT.getSizeInBits()));
+
+ // two sanity checks
+ assert( LN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = LN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+ } else {
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -pso;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(rotamt, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(-pso, PtrVT));
+ }
+
+ // Do the load as a i128 to allow possible shifting
+ SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), false, 16);
+
+ // When the size is not greater than alignment we get all data with just
+ // one load
+ if (alignment >= InVT.getSizeInBits()/8) {
+ // Update the chain
+ the_chain = low.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+ low.getValue(0), rotate);
+
+ // Convert the loaded v16i8 vector to the appropriate vector type
+ // specified by the operand:
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT, (128 / InVT.getSizeInBits()));
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+ DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+ }
+ // When alignment is less than the size, we might need (known only at
+ // run-time) two loads
+ // TODO: if the memory address is composed only from constants, we have
+ // extra kowledge, and might avoid the second load
+ else {
+ // storage position offset from lower 16 byte aligned memory chunk
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+ // get a registerfull of ones. (this implementation is a workaround: LLVM
+ // cannot handle 128 bit signed int constants)
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(16, PtrVT)),
+ highMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(), false,
+ 16);
+
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ high.getValue(1));
+
+ // Shift the (possible) high part right to compensate the misalignemnt.
+ // if there is no highpart (i.e. value is i64 and offset is 4), this
+ // will zero out the high value.
+ high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+ DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset
+ ));
+
+ // Shift the low similarly
+ // TODO: add SPUISD::SHL_BYTES
+ low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+ // Merge the two parts
+ result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+ DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+ if (!InVT.isVector()) {
+ result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+ }
+
+ }
+ // Handle extending loads by extending the scalar result:
+ if (ExtType == ISD::SEXTLOAD) {
+ result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::ZEXTLOAD) {
+ result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+ } else if (ExtType == ISD::EXTLOAD) {
+ unsigned NewOpc = ISD::ANY_EXTEND;
+
+ if (OutVT.isFloatingPoint())
+ NewOpc = ISD::FP_EXTEND;
+
+ result = DAG.getNode(NewOpc, dl, OutVT, result);
+ }
+
+ SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+ SDValue retops[2] = {
+ result,
+ the_chain
+ };
+
+ result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+ retops, sizeof(retops) / sizeof(retops[0]));
+ return result;
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ StoreSDNode *SN = cast<StoreSDNode>(Op);
+ SDValue Value = SN->getValue();
+ EVT VT = Value.getValueType();
+ EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned alignment = SN->getAlignment();
+ SDValue result;
+ EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+ (128 / StVT.getSizeInBits()));
+ // Get pointerinfos to the memory chunk(s) that contain the data to load
+ uint64_t mpi_offset = SN->getPointerInfo().Offset;
+ mpi_offset -= mpi_offset%16;
+ MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+ MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+ // two sanity checks
+ assert( SN->getAddressingMode() == ISD::UNINDEXED
+ && "we should get only UNINDEXED adresses");
+ // clean aligned loads can be selected as-is
+ if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+ return SDValue();
+
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
+
+ if ((alignment%16) == 0) {
+ ConstantSDNode *CN;
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Load the lower part of the memory to which to store.
+ SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+ lowMemPtr, SN->isVolatile(), SN->isNonTemporal(),
+ false, 16);
+
+ // if we don't need to store over the 16 byte boundary, one store suffices
+ if (alignment >= StVT.getSizeInBits()/8) {
+ // Update the chain
+ the_chain = low.getValue(1);
+
+ LoadSDNode *LN = cast<LoadSDNode>(low);
+ SDValue theValue = SN->getValue();
+
+ if (StVT != VT
+ && (theValue.getOpcode() == ISD::AssertZext
+ || theValue.getOpcode() == ISD::AssertSext)) {
+ // Drill down and get the value for zero- and sign-extended
+ // quantities
+ theValue = theValue.getOperand(0);
+ }
+
+ // If the base pointer is already a D-form address, then just create
+ // a new D-form address with a slot offset and the orignal base pointer.
+ // Otherwise generate a D-form address with the slot offset relative
+ // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+ insertEltOffs);
+ SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+ theValue);
+
+ result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+ vectorizeOp, low,
+ DAG.getNode(ISD::BITCAST, dl,
+ MVT::v4i32, insertEltOp));
+
+ result = DAG.getStore(the_chain, dl, result, basePtr,
+ lowMemPtr,
+ LN->isVolatile(), LN->isNonTemporal(),
+ 16);
+
+ }
+ // do the store when it might cross the 16 byte memory access boundary.
+ else {
+ // TODO issue a warning if SN->isVolatile()== true? This is likely not
+ // what the user wanted.
+
+ // address offset from nearest lower 16byte alinged address
+ SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+ SN->getBasePtr(),
+ DAG.getConstant(0xf, MVT::i32));
+ // 16 - offset
+ SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ offset);
+ // 16 - sizeof(Value)
+ SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+ DAG.getConstant( 16, MVT::i32),
+ DAG.getConstant( VT.getSizeInBits()/8,
+ MVT::i32));
+ // get a registerfull of ones
+ SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+ ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+ // Create the 128 bit masks that have ones where the data to store is
+ // located.
+ SDValue lowmask, himask;
+ // if the value to store don't fill up the an entire 128 bits, zero
+ // out the last bits of the mask so that only the value we want to store
+ // is masked.
+ // this is e.g. in the case of store i32, align 2
+ if (!VT.isVector()){
+ Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+ lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ surplus);
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+ }
+ else {
+ lowmask = ones;
+ Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+ }
+ // this will zero, if there are no data that goes to the high quad
+ himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+ offset_compl);
+ lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+ offset);
+
+ // Load in the old data and zero out the parts that will be overwritten with
+ // the new data to store.
+ SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(),
+ false, 16);
+ the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+ hi.getValue(1));
+
+ low = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+ hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+ DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+ // Shift the Value to store into place. rlow contains the parts that go to
+ // the lower memory chunk, rhi has the parts that go to the upper one.
+ SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+ rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+ SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+ offset_compl);
+
+ // Merge the old data and the new data and store the results
+ // Need to convert vectors here to integer as 'OR'ing floats assert
+ rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+ rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+ DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+ low = DAG.getStore(the_chain, dl, rlow, basePtr,
+ lowMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ hi = DAG.getStore(the_chain, dl, rhi,
+ DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+ DAG.getConstant( 16, PtrVT)),
+ highMemPtr,
+ SN->isVolatile(), SN->isNonTemporal(), 16);
+ result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+ hi.getValue(0));
+ }
+
+ return result;
+}
+
+//! Generate the address of a constant pool entry.
+static SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ const Constant *C = CP->getConstVal();
+ SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ // Just return the SDValue with the constant pool address in it.
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ llvm_unreachable("LowerConstantPool: Relocation model other than static"
+ " not supported.");
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+ return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ const TargetMachine &TM = DAG.getTarget();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ }
+
+ llvm_unreachable("LowerJumpTable: Relocation model other than static"
+ " not supported.");
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+ EVT PtrVT = Op.getValueType();
+ GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
+ const TargetMachine &TM = DAG.getTarget();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ if (!ST->usingLargeMem()) {
+ return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+ } else {
+ SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+ SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+ return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+ }
+ } else {
+ report_fatal_error("LowerGlobalAddress: Relocation model other than static"
+ "not supported.");
+ /*NOTREACHED*/
+ }
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ // FIXME there is no actual debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (VT == MVT::f64) {
+ ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+ assert((FP != 0) &&
+ "LowerConstantFP: Node is not ConstantFPSDNode");
+
+ uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+ SDValue T = DAG.getConstant(dbits, MVT::i64);
+ SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
+ }
+
+ return SDValue();
+}
+
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg>
+ &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+ const {
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
+
+ unsigned ArgOffset = SPUFrameLowering::minStackSize();
+ unsigned ArgRegIdx = 0;
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
+ // Add DAG nodes to load the arguments or copy them out of registers.
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+ EVT ObjectVT = Ins[ArgNo].VT;
+ unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+ SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
+
+ if (VA.isRegLoc()) {
+ const TargetRegisterClass *ArgRegClass;
+
+ switch (ObjectVT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+ Twine(ObjectVT.getEVTString()));
+ case MVT::i8:
+ ArgRegClass = &SPU::R8CRegClass;
+ break;
+ case MVT::i16:
+ ArgRegClass = &SPU::R16CRegClass;
+ break;
+ case MVT::i32:
+ ArgRegClass = &SPU::R32CRegClass;
+ break;
+ case MVT::i64:
+ ArgRegClass = &SPU::R64CRegClass;
+ break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
+ case MVT::f32:
+ ArgRegClass = &SPU::R32FPRegClass;
+ break;
+ case MVT::f64:
+ ArgRegClass = &SPU::R64FPRegClass;
+ break;
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v2i64:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ ArgRegClass = &SPU::VECREGRegClass;
+ break;
+ }
+
+ unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
+ ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+ ++ArgRegIdx;
+ } else {
+ // We need to load the argument to a virtual register if we determined
+ // above that we ran out of physical registers of the appropriate type
+ // or we're forced to do vararg
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
+ SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+ ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+ false, false, false, 0);
+ ArgOffset += StackSlotSize;
+ }
+
+ InVals.push_back(ArgVal);
+ // Update the chain
+ Chain = ArgVal.getOperand(0);
+ }
+
+ // vararg handling:
+ if (isVarArg) {
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const uint16_t ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ const unsigned NumArgRegs = 77;
+
+ // We will spill (79-3)+1 registers to the stack
+ SmallVector<SDValue, 79-3+1> MemOps;
+
+ // Create the frame slot
+ for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+ FuncInfo->setVarArgsFrameIndex(
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+ SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
+ SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
+ SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
+ false, false, 0);
+ Chain = Store.getOperand(0);
+ MemOps.push_back(Store);
+
+ // Increment address by stack slot size for the next stored argument
+ ArgOffset += StackSlotSize;
+ }
+ if (!MemOps.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOps[0], MemOps.size());
+ }
+
+ return Chain;
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C) return 0;
+
+ int Addr = C->getZExtValue();
+ if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
+ (Addr << 14 >> 14) != Addr)
+ return 0; // Top 14 bits have to be sext of immediate.
+
+ return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ // CellSPU target does not yet support tail call optimization.
+ isTailCall = false;
+
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ unsigned NumOps = Outs.size();
+ unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
+
+ // Handy pointer type
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // Set up a copy of the stack pointer for use loading and storing any
+ // arguments that may not fit in the registers available for argument
+ // passing.
+ SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+ // Figure out which arguments are going to go in registers, and which in
+ // memory.
+ unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
+ unsigned ArgRegIdx = 0;
+
+ // Keep track of registers passing arguments
+ std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+ // And the arguments passed on the stack
+ SmallVector<SDValue, 8> MemOpChains;
+
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
+
+ // PtrOff will be used to store the current argument to the stack if a
+ // register cannot be found for it.
+ SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+ PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+ switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::i128:
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::v2i64:
+ case MVT::v2f64:
+ case MVT::v4f32:
+ case MVT::v4i32:
+ case MVT::v8i16:
+ case MVT::v16i8:
+ if (ArgRegIdx != NumArgRegs) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo(),
+ false, false, 0));
+ ArgOffset += StackSlotSize;
+ }
+ break;
+ }
+ }
+
+ // Accumulate how many bytes are to be pushed on the stack, including the
+ // linkage area, and parameter passing area. According to the SPU ABI,
+ // we minimally need space for [LR] and [SP].
+ unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
+
+ // Insert a call sequence start
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+ true));
+
+ if (!MemOpChains.empty()) {
+ // Adjust the stack pointer for the stack arguments.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+ }
+
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ unsigned CallOpc = SPUISD::CALL;
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
+
+ if (!ST->usingLargeMem()) {
+ // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+ // style calls, otherwise, external symbols are BRASL calls. This assumes
+ // that declared/defined symbols are in the same compilation unit and can
+ // be reached through PC-relative jumps.
+ //
+ // NOTE:
+ // This may be an unsafe assumption for JIT and really large compilation
+ // units.
+ if (GV->isDeclaration()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+ }
+ } else {
+ // "Large memory" mode: Turn all calls into indirect calls with a X-form
+ // address pairs:
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+ }
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ EVT CalleeVT = Callee.getValueType();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+ Callee.getValueType());
+
+ if (!ST->usingLargeMem()) {
+ Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+ } else {
+ Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+ }
+ } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+ // If this is an absolute destination address that appears to be a legal
+ // local store address, use the munged value.
+ Callee = SDValue(Dest, 0);
+ }
+
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
+ &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // If the function returns void, just return the chain.
+ if (Ins.empty())
+ return Chain;
+
+ // Now handle the return value(s)
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
+ // If the call has results, copy the values out of the ret val registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign VA = RVLocs[i];
+
+ SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+ InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ InVals.push_back(Val);
+ }
+
+ return Chain;
+}
+
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const {
+
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
+
+ // If this is the first return lowered for this function, add the regs to the
+ // liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ OutVals[i], Flag);
+ Flag = Chain.getValue(1);
+ }
+
+ if (Flag.getNode())
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ else
+ return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+ SDValue OpVal(0, 0);
+
+ // Check to see if this buildvec has a single non-undef value in its elements.
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ else if (OpVal != N->getOperand(i))
+ return 0;
+ }
+
+ if (OpVal.getNode() != 0) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+ return CN;
+ }
+ }
+
+ return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value <= 0x3ffff)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int64_t Value = CN->getSExtValue();
+ if (ValueType == MVT::i64) {
+ uint64_t UValue = CN->getZExtValue();
+ uint32_t upper = uint32_t(UValue >> 32);
+ uint32_t lower = uint32_t(UValue);
+ if (upper != lower)
+ return SDValue();
+ Value = Value >> 32;
+ }
+ if (isInt<10>(Value))
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ int Value = (int) CN->getZExtValue();
+ if (ValueType == MVT::i16
+ && Value <= 0xffff /* truncated from uint64_t */
+ && ((short) Value >> 8) == ((short) Value & 0xff))
+ return DAG.getTargetConstant(Value & 0xff, ValueType);
+ else if (ValueType == MVT::i8
+ && (Value & 0xff) == Value)
+ return DAG.getTargetConstant(Value, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ uint64_t Value = CN->getZExtValue();
+ if ((ValueType == MVT::i32
+ && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+ || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+ return DAG.getTargetConstant(Value >> 16, ValueType);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+ }
+
+ return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+ if (ConstantSDNode *CN = getVecImm(N)) {
+ return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+ }
+
+ return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ DebugLoc dl = Op.getDebugLoc();
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+ unsigned minSplatBits = EltVT.getSizeInBits();
+
+ if (minSplatBits < 16)
+ minSplatBits = 16;
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ || minSplatBits < SplatBitSize)
+ return SDValue(); // Wasn't a constant vector or splat exceeded min
+
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+ Twine(VT.getEVTString()));
+ /*NOTREACHED*/
+ case MVT::v4f32: {
+ uint32_t Value32 = uint32_t(SplatBits);
+ assert(SplatBitSize == 32
+ && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(Value32, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+ }
+ case MVT::v2f64: {
+ uint64_t f64val = uint64_t(SplatBits);
+ assert(SplatBitSize == 64
+ && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+ // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+ SDValue T = DAG.getConstant(f64val, MVT::i64);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+ }
+ case MVT::v16i8: {
+ // 8-bit constants have to be expanded to 16-bits
+ unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+ return DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+ }
+ case MVT::v8i16: {
+ unsigned short Value16 = SplatBits;
+ SDValue T = DAG.getConstant(Value16, EltVT);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.assign(8, T);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ }
+ case MVT::v4i32: {
+ SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+ }
+ case MVT::v2i64: {
+ return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+ }
+ }
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+ DebugLoc dl) {
+ uint32_t upper = uint32_t(SplatVal >> 32);
+ uint32_t lower = uint32_t(SplatVal);
+
+ if (upper == lower) {
+ // Magic constant that can be matched by IL, ILA, et. al.
+ SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ Val, Val, Val, Val));
+ } else {
+ bool upper_special, lower_special;
+
+ // NOTE: This code creates common-case shuffle masks that can be easily
+ // detected as common expressions. It is not attempting to create highly
+ // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+ // Detect if the upper or lower half is a special shuffle mask pattern:
+ upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+ lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+ // Both upper and lower are special, lower to a constant pool load:
+ if (lower_special && upper_special) {
+ SDValue UpperVal = DAG.getConstant(upper, MVT::i32);
+ SDValue LowerVal = DAG.getConstant(lower, MVT::i32);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ UpperVal, LowerVal, UpperVal, LowerVal);
+ return DAG.getNode(ISD::BITCAST, dl, OpVT, BV);
+ }
+
+ SDValue LO32;
+ SDValue HI32;
+ SmallVector<SDValue, 16> ShufBytes;
+ SDValue Result;
+
+ // Create lower vector if not a special pattern
+ if (!lower_special) {
+ SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+ LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ LO32C, LO32C, LO32C, LO32C));
+ }
+
+ // Create upper vector if not a special pattern
+ if (!upper_special) {
+ SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+ HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ HI32C, HI32C, HI32C, HI32C));
+ }
+
+ // If either upper or lower are special, then the two input operands are
+ // the same (basically, one of them is a "don't care")
+ if (lower_special)
+ LO32 = HI32;
+ if (upper_special)
+ HI32 = LO32;
+
+ for (int i = 0; i < 4; ++i) {
+ uint64_t val = 0;
+ for (int j = 0; j < 4; ++j) {
+ SDValue V;
+ bool process_upper, process_lower;
+ val <<= 8;
+ process_upper = (upper_special && (i & 1) == 0);
+ process_lower = (lower_special && (i & 1) == 1);
+
+ if (process_upper || process_lower) {
+ if ((process_upper && upper == 0)
+ || (process_lower && lower == 0))
+ val |= 0x80;
+ else if ((process_upper && upper == 0xffffffff)
+ || (process_lower && lower == 0xffffffff))
+ val |= 0xc0;
+ else if ((process_upper && upper == 0x80000000)
+ || (process_lower && lower == 0x80000000))
+ val |= (j == 0 ? 0xe0 : 0x80);
+ } else
+ val |= i * 4 + j + ((i & 1) * 16);
+ }
+
+ ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+ }
+
+ return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufBytes[0], ShufBytes.size()));
+ }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+ const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+ // If we have a single element being moved from V1 to V2, this can be handled
+ // using the C*[DX] compute mask instructions, but the vector elements have
+ // to be monotonically increasing with one exception element, and the source
+ // slot of the element to move must be the same as the destination.
+ EVT VecVT = V1.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned EltsFromV2 = 0;
+ unsigned V2EltOffset = 0;
+ unsigned V2EltIdx0 = 0;
+ unsigned CurrElt = 0;
+ unsigned MaxElts = VecVT.getVectorNumElements();
+ unsigned PrevElt = 0;
+ bool monotonic = true;
+ bool rotate = true;
+ int rotamt=0;
+ EVT maskVT; // which of the c?d instructions to use
+
+ if (EltVT == MVT::i8) {
+ V2EltIdx0 = 16;
+ maskVT = MVT::v16i8;
+ } else if (EltVT == MVT::i16) {
+ V2EltIdx0 = 8;
+ maskVT = MVT::v8i16;
+ } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+ V2EltIdx0 = 4;
+ maskVT = MVT::v4i32;
+ } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+ V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
+ } else
+ llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+ for (unsigned i = 0; i != MaxElts; ++i) {
+ if (SVN->getMaskElt(i) < 0)
+ continue;
+
+ unsigned SrcElt = SVN->getMaskElt(i);
+
+ if (monotonic) {
+ if (SrcElt >= V2EltIdx0) {
+ // TODO: optimize for the monotonic case when several consecutive
+ // elements are taken form V2. Do we ever get such a case?
+ if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+ V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+ else
+ monotonic = false;
+ ++EltsFromV2;
+ } else if (CurrElt != SrcElt) {
+ monotonic = false;
+ }
+
+ ++CurrElt;
+ }
+
+ if (rotate) {
+ if (PrevElt > 0 && SrcElt < MaxElts) {
+ if ((PrevElt == SrcElt - 1)
+ || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+ PrevElt = SrcElt;
+ } else {
+ rotate = false;
+ }
+ } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+ // First time or after a "wrap around"
+ rotamt = SrcElt-i;
+ PrevElt = SrcElt;
+ } else {
+ // This isn't a rotation, takes elements from vector 2
+ rotate = false;
+ }
+ }
+ }
+
+ if (EltsFromV2 == 1 && monotonic) {
+ // Compute mask and shuffle
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2EltOffset, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
+ // Use shuffle mask in SHUFB synthetic instruction:
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+ ShufMaskOp);
+ } else if (rotate) {
+ if (rotamt < 0)
+ rotamt +=MaxElts;
+ rotamt *= EltVT.getSizeInBits()/8;
+ return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+ V1, DAG.getConstant(rotamt, MVT::i16));
+ } else {
+ // Convert the SHUFFLE_VECTOR mask's input element units to the
+ // actual bytes.
+ unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+ SmallVector<SDValue, 16> ResultMask;
+ for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+ unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+ for (unsigned j = 0; j < BytesPerElement; ++j)
+ ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+ }
+ SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+ &ResultMask[0], ResultMask.size());
+ return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+ }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Op0.getNode()->getOpcode() == ISD::Constant) {
+ // For a constant, build the appropriate constant vector, which will
+ // eventually simplify to a vector register load.
+
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+ SmallVector<SDValue, 16> ConstVecValues;
+ EVT VT;
+ size_t n_copies;
+
+ // Create a constant vector:
+ switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected constant value type in "
+ "LowerSCALAR_TO_VECTOR");
+ case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+ case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+ case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+ case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+ case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+ case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+ }
+
+ SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+ for (size_t j = 0; j < n_copies; ++j)
+ ConstVecValues.push_back(CValue);
+
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+ &ConstVecValues[0], ConstVecValues.size());
+ } else {
+ // Otherwise, copy the value from one register to another:
+ switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ case MVT::f32:
+ case MVT::f64:
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+ }
+ }
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ SDValue N = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue retval;
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+ // Constant argument:
+ int EltNo = (int) C->getZExtValue();
+
+ // sanity checks:
+ if (VT == MVT::i8 && EltNo >= 16)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+ else if (VT == MVT::i16 && EltNo >= 8)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+ else if (VT == MVT::i32 && EltNo >= 4)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+ else if (VT == MVT::i64 && EltNo >= 2)
+ llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+ if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+ // i32 and i64: Element 0 is the preferred slot
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+ }
+
+ // Need to generate shuffle mask and extract:
+ int prefslot_begin = -1, prefslot_end = -1;
+ int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid value type!");
+ case MVT::i8: {
+ prefslot_begin = prefslot_end = 3;
+ break;
+ }
+ case MVT::i16: {
+ prefslot_begin = 2; prefslot_end = 3;
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ prefslot_begin = 0; prefslot_end = 3;
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ prefslot_begin = 0; prefslot_end = 7;
+ break;
+ }
+ }
+
+ assert(prefslot_begin != -1 && prefslot_end != -1 &&
+ "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+ unsigned int ShufBytes[16] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ for (int i = 0; i < 16; ++i) {
+ // zero fill uppper part of preferred slot, don't care about the
+ // other slots:
+ unsigned int mask_val;
+ if (i <= prefslot_end) {
+ mask_val =
+ ((i < prefslot_begin)
+ ? 0x80
+ : elt_byte + (i - prefslot_begin));
+
+ ShufBytes[i] = mask_val;
+ } else
+ ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+ }
+
+ SDValue ShufMask[4];
+ for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+ unsigned bidx = i * 4;
+ unsigned int bits = ((ShufBytes[bidx] << 24) |
+ (ShufBytes[bidx+1] << 16) |
+ (ShufBytes[bidx+2] << 8) |
+ ShufBytes[bidx+3]);
+ ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+ }
+
+ SDValue ShufMaskVec =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+ N, N, ShufMaskVec));
+ } else {
+ // Variable index: Rotate the requested element into slot 0, then replicate
+ // slot 0 across the vector
+ EVT VecVT = N.getValueType();
+ if (!VecVT.isSimple() || !VecVT.isVector()) {
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+ "vector type!");
+ }
+
+ // Make life easier by making sure the index is zero-extended to i32
+ if (Elt.getValueType() != MVT::i32)
+ Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+ // Scale the index to a bit/byte shift quantity
+ APInt scaleFactor =
+ APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+ unsigned scaleShift = scaleFactor.logBase2();
+ SDValue vecShift;
+
+ if (scaleShift > 0) {
+ // Scale the shift factor:
+ Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+ DAG.getConstant(scaleShift, MVT::i32));
+ }
+
+ vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
+
+ // Replicate the bytes starting at byte 0 across the entire vector (for
+ // consistency with the notion of a unified register set)
+ SDValue replicate;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default:
+ report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+ "type");
+ /*NOTREACHED*/
+ case MVT::i8: {
+ SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i16: {
+ SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i32:
+ case MVT::f32: {
+ SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ factor, factor, factor, factor);
+ break;
+ }
+ case MVT::i64:
+ case MVT::f64: {
+ SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+ SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+ replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ loFactor, hiFactor, loFactor, hiFactor);
+ break;
+ }
+ }
+
+ retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+ DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ vecShift, vecShift, replicate));
+ }
+
+ return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+ SDValue VecOp = Op.getOperand(0);
+ SDValue ValOp = Op.getOperand(1);
+ SDValue IdxOp = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+ EVT VT = Op.getValueType();
+ EVT eltVT = ValOp.getValueType();
+
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Offset=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+ }
+
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ // Use $sp ($1) because it's always 16-byte aligned and it's available:
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(Offset, PtrVT));
+ // widen the mask when dealing with half vectors
+ EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+ 128/ VT.getVectorElementType().getSizeInBits());
+ SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
+
+ SDValue result =
+ DAG.getNode(SPUISD::SHUFB, dl, VT,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+ VecOp,
+ DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
+
+ return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
+{
+ SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ DebugLoc dl = Op.getDebugLoc();
+ EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
+
+ assert(Op.getValueType() == MVT::i8);
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled i8 math operator");
+ case ISD::ADD: {
+ // 8-bit addition: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+ }
+
+ case ISD::SUB: {
+ // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+ // the result:
+ SDValue N1 = Op.getOperand(1);
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::ROTR:
+ case ISD::ROTL: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+ ? ISD::ZERO_EXTEND
+ : ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ // Replicate lower 8-bits into upper 8:
+ SDValue ExpandArg =
+ DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+ DAG.getNode(ISD::SHL, dl, MVT::i16,
+ N0, DAG.getConstant(8, MVT::i32)));
+
+ // Truncate back down to i8
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+ }
+ case ISD::SRL:
+ case ISD::SHL: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::ZERO_EXTEND;
+
+ if (N1.getValueType().bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::SRA: {
+ SDValue N1 = Op.getOperand(1);
+ EVT N1VT = N1.getValueType();
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ if (!N1VT.bitsEq(ShiftVT)) {
+ unsigned N1Opc = ISD::SIGN_EXTEND;
+
+ if (N1VT.bitsGT(ShiftVT))
+ N1Opc = ISD::TRUNCATE;
+ N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ case ISD::MUL: {
+ SDValue N1 = Op.getOperand(1);
+
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+ return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+ DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+ }
+ }
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+ SDValue ConstVec;
+ SDValue Arg;
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+
+ ConstVec = Op.getOperand(0);
+ Arg = Op.getOperand(1);
+ if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+ ConstVec = ConstVec.getOperand(0);
+ } else {
+ ConstVec = Op.getOperand(1);
+ Arg = Op.getOperand(0);
+ if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+ ConstVec = ConstVec.getOperand(0);
+ }
+ }
+ }
+
+ if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+ BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+ assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+ if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+ HasAnyUndefs, minSplatBits)
+ && minSplatBits <= SplatBitSize) {
+ uint64_t SplatBits = APSplatBits.getZExtValue();
+ SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+ SmallVector<SDValue, 16> tcVec;
+ tcVec.assign(16, tc);
+ return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+ }
+ }
+
+ // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+ // lowered. Return the operation, rather than a null SDValue.
+ return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+ Custom lowering code that counts the number ones in the input
+ operand. SPU has such an instruction, but it counts the number of
+ ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
+ EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Invalid value type!");
+ case MVT::i8: {
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+ }
+
+ case MVT::i16: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+ SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+ SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+ return DAG.getNode(ISD::AND, dl, MVT::i16,
+ DAG.getNode(ISD::ADD, dl, MVT::i16,
+ DAG.getNode(ISD::SRL, dl, MVT::i16,
+ Tmp1, Shift1),
+ Tmp1),
+ Mask0);
+ }
+
+ case MVT::i32: {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+ SDValue N = Op.getOperand(0);
+ SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+ SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+ SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+ SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+ SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+ // CNTB_result becomes the chain to which all of the virtual registers
+ // CNTB_reg, SUM1_reg become associated:
+ SDValue CNTB_result =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+ SDValue CNTB_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+ SDValue Comp1 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+ Shift1);
+
+ SDValue Sum1 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+ DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+ SDValue Sum1_rescopy =
+ DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+ SDValue Comp2 =
+ DAG.getNode(ISD::SRL, dl, MVT::i32,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+ Shift2);
+ SDValue Sum2 =
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+ DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+ return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+ }
+
+ case MVT::i64:
+ break;
+ }
+
+ return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+ || OpVT == MVT::i64) {
+ // Convert f32 / f64 to i32 / i64 via libcall.
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::FP_TO_SINT)
+ ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+ : RTLIB::getFPTOUINT(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetLowering &TLI) {
+ EVT OpVT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+ || Op0VT == MVT::i64) {
+ // Convert i32, i64 to f64 via libcall:
+ RTLIB::Libcall LC =
+ (Op.getOpcode() == ISD::SINT_TO_FP)
+ ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+ : RTLIB::getUINTTOFP(Op0VT, OpVT);
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+ SDValue Dummy;
+ return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+ }
+
+ return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+ DebugLoc dl = Op.getDebugLoc();
+ assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ EVT lhsVT = lhs.getValueType();
+ assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+ EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+ APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ EVT IntVT(MVT::i64);
+
+ // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+ // selected to a NOP:
+ SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
+ SDValue lhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64lhs, DAG.getConstant(32, MVT::i32)));
+ SDValue lhsHi32abs =
+ DAG.getNode(ISD::AND, dl, MVT::i32,
+ lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+ SDValue lhsLo32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+ // SETO and SETUO only use the lhs operand:
+ if (CC->get() == ISD::SETO) {
+ // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+ // SETUO
+ APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, lhsVT),
+ ISD::SETUO),
+ DAG.getConstant(ccResultAllOnes, ccResultVT));
+ } else if (CC->get() == ISD::SETUO) {
+ // Evaluates to true if Op0 is [SQ]NaN
+ return DAG.getNode(ISD::AND, dl, ccResultVT,
+ DAG.getSetCC(dl, ccResultVT,
+ lhsHi32abs,
+ DAG.getConstant(0x7ff00000, MVT::i32),
+ ISD::SETGE),
+ DAG.getSetCC(dl, ccResultVT,
+ lhsLo32,
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETGT));
+ }
+
+ SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
+ SDValue rhsHi32 =
+ DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+ DAG.getNode(ISD::SRL, dl, IntVT,
+ i64rhs, DAG.getConstant(32, MVT::i32)));
+
+ // If a value is negative, subtract from the sign magnitude constant:
+ SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+ // Convert the sign-magnitude representation into 2's complement:
+ SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ lhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+ SDValue lhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+ SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+ rhsHi32, DAG.getConstant(31, MVT::i32));
+ SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+ SDValue rhsSelect =
+ DAG.getNode(ISD::SELECT, dl, IntVT,
+ rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+ unsigned compareOp;
+
+ switch (CC->get()) {
+ case ISD::SETOEQ:
+ case ISD::SETUEQ:
+ compareOp = ISD::SETEQ; break;
+ case ISD::SETOGT:
+ case ISD::SETUGT:
+ compareOp = ISD::SETGT; break;
+ case ISD::SETOGE:
+ case ISD::SETUGE:
+ compareOp = ISD::SETGE; break;
+ case ISD::SETOLT:
+ case ISD::SETULT:
+ compareOp = ISD::SETLT; break;
+ case ISD::SETOLE:
+ case ISD::SETULE:
+ compareOp = ISD::SETLE; break;
+ case ISD::SETUNE:
+ case ISD::SETONE:
+ compareOp = ISD::SETNE; break;
+ default:
+ report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
+ }
+
+ SDValue result =
+ DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+ (ISD::CondCode) compareOp);
+
+ if ((CC->get() & 0x8) == 0) {
+ // Ordered comparison:
+ SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+ lhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+ rhs, DAG.getConstantFP(0.0, MVT::f64),
+ ISD::SETO);
+ SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+ result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+ }
+
+ return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+ ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+ SELB instruction.
+
+ \note Need to revisit this in the future: if the code path through the true
+ and false value computations is longer than the latency of a branch (6
+ cycles), then it would be more advantageous to branch and insert a new basic
+ block and branch on the condition. However, this code does not make that
+ assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ EVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue trueval = Op.getOperand(2);
+ SDValue falseval = Op.getOperand(3);
+ SDValue condition = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
+ // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+ // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+ // with another "cannot select select_cc" assert:
+
+ SDValue compare = DAG.getNode(ISD::SETCC, dl,
+ TLI.getSetCCResultType(Op.getValueType()),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+ // Type to truncate to
+ EVT VT = Op.getValueType();
+ MVT simpleVT = VT.getSimpleVT();
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+ VT, (128 / VT.getSizeInBits()));
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to truncate from
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+
+ if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
+ // Create shuffle mask, least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+ Op0, Op0, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+ }
+
+ return SDValue(); // Leave the truncate unmolested
+}
+
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Type to extend to
+ MVT OpVT = Op.getValueType().getSimpleVT();
+
+ // Type to extend from
+ SDValue Op0 = Op.getOperand(0);
+ MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+ // extend i8 & i16 via i32
+ if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+ Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+ Op0VT = MVT::i32;
+ }
+
+ // The type to extend to needs to be a i128 and
+ // the type to extend from needs to be i64 or i32.
+ assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+ "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+ (void)OpVT;
+
+ // Create shuffle mask
+ unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+ unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
+ unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask1, MVT::i32),
+ DAG.getConstant(mask2, MVT::i32),
+ DAG.getConstant(mask3, MVT::i32));
+
+ // Word wise arithmetic right shift to generate at least one byte
+ // that contains sign bits.
+ MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+ SDValue sraVal = DAG.getNode(ISD::SRA,
+ dl,
+ mvt,
+ DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+ DAG.getConstant(31, MVT::i32));
+
+ // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+ SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+ dl, Op0VT, Op0,
+ DAG.getTargetConstant(
+ SPU::GPRCRegClass.getID(),
+ MVT::i32)), 0);
+ // Shuffle bytes - Copy the sign bits into the upper 64 bits
+ // and the input value into the lower 64 bits.
+ SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+ extended, sraVal, shufMask);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+ This is where LLVM's DAG selection process calls to do target-specific
+ lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ unsigned Opc = (unsigned) Op.getOpcode();
+ EVT VT = Op.getValueType();
+
+ switch (Opc) {
+ default: {
+#ifndef NDEBUG
+ errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
+ Op.getNode()->dump();
+#endif
+ llvm_unreachable(0);
+ }
+ case ISD::LOAD:
+ case ISD::EXTLOAD:
+ case ISD::SEXTLOAD:
+ case ISD::ZEXTLOAD:
+ return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::STORE:
+ return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+ case ISD::ConstantFP:
+ return LowerConstantFP(Op, DAG);
+
+ // i8, i64 math ops:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case ISD::SRL:
+ case ISD::SHL:
+ case ISD::SRA: {
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+ break;
+ }
+
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG, *this);
+
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG, *this);
+
+ // Vector-related lowering.
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR:
+ return LowerSCALAR_TO_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+ // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ return LowerByteImmed(Op, DAG);
+
+ // Vector and i8 multiply:
+ case ISD::MUL:
+ if (VT == MVT::i8)
+ return LowerI8Math(Op, DAG, Opc, *this);
+
+ case ISD::CTPOP:
+ return LowerCTPOP(Op, DAG);
+
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG, *this);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG, *this);
+
+ case ISD::TRUNCATE:
+ return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SIGN_EXTEND:
+ return LowerSIGN_EXTEND(Op, DAG);
+ }
+
+ return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+ SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const
+{
+#if 0
+ unsigned Opc = (unsigned) N->getOpcode();
+ EVT OpVT = N->getValueType(0);
+
+ switch (Opc) {
+ default: {
+ errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+ errs() << "Op.getOpcode() = " << Opc << "\n";
+ errs() << "*Op.getNode():\n";
+ N->dump();
+ abort();
+ /*NOTREACHED*/
+ }
+ }
+#endif
+
+ /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+ TargetMachine &TM = getTargetMachine();
+#endif
+ const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0); // everything has at least one operand
+ EVT NodeVT = N->getValueType(0); // The node's value type
+ EVT Op0VT = Op0.getValueType(); // The first operand's result
+ SDValue Result; // Initially, empty result
+ DebugLoc dl = N->getDebugLoc();
+
+ switch (N->getOpcode()) {
+ default: break;
+ case ISD::ADD: {
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ IndirectArg, combinedValue);
+ }
+ }
+ }
+ break;
+ }
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND: {
+ if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+ // (any_extend (SPUextract_elt0 <arg>)) ->
+ // (SPUextract_elt0 <arg>)
+ // Types must match, however...
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\nReplace: ";
+ N->dump(&DAG);
+ errs() << "\nWith: ";
+ Op0.getNode()->dump(&DAG);
+ errs() << "\n";
+ }
+#endif
+
+ return Op0;
+ }
+ break;
+ }
+ case SPUISD::IndirectAddr: {
+ if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+ ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (CN != 0 && CN->isNullValue()) {
+ // (SPUindirect (SPUaform <addr>, 0), 0) ->
+ // (SPUaform <addr>, 0)
+
+ DEBUG(errs() << "Replace: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(errs() << "\nWith: ");
+ DEBUG(Op0.getNode()->dump(&DAG));
+ DEBUG(errs() << "\n");
+
+ return Op0;
+ }
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ errs() << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
+ }
+ break;
+ }
+ case SPUISD::SHL_BITS:
+ case SPUISD::SHL_BYTES:
+ case SPUISD::ROTBYTES_LEFT: {
+ SDValue Op1 = N->getOperand(1);
+
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
+ Result = Op0;
+ }
+ }
+ break;
+ }
+ case SPUISD::PREFSLOT2VEC: {
+ switch (Op0.getOpcode()) {
+ default:
+ break;
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+ // <arg>
+ // but only if the SPUprefslot2vec and <arg> types match.
+ SDValue Op00 = Op0.getOperand(0);
+ if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+ SDValue Op000 = Op00.getOperand(0);
+ if (Op000.getValueType() == NodeVT) {
+ Result = Op000;
+ }
+ }
+ break;
+ }
+ case SPUISD::VEC2PREFSLOT: {
+ // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+ // <arg>
+ Result = Op0.getOperand(0);
+ break;
+ }
+ }
+ break;
+ }
+ }
+
+ // Otherwise, return unchanged.
+#ifndef NDEBUG
+ if (Result.getNode()) {
+ DEBUG(errs() << "\nReplace.SPU: ");
+ DEBUG(N->dump(&DAG));
+ DEBUG(errs() << "\nWith: ");
+ DEBUG(Result.getNode()->dump(&DAG));
+ DEBUG(errs() << "\n");
+ }
+#endif
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+ if (ConstraintLetter.size() == 1) {
+ switch (ConstraintLetter[0]) {
+ default: break;
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'v':
+ case 'y':
+ return C_RegisterClass;
+ }
+ }
+ return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ default:
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ //FIXME: Seems like the supported constraint letters were just copied
+ // from PPC, as the following doesn't correspond to the GCC docs.
+ // I'm leaving it so until someone adds the corresponding lowering support.
+ case 'b':
+ case 'r':
+ case 'f':
+ case 'd':
+ case 'v':
+ case 'y':
+ weight = CW_Register;
+ break;
+ }
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const
+{
+ if (Constraint.size() == 1) {
+ // GCC RS6000 Constraint Letters
+ switch (Constraint[0]) {
+ case 'b': // R1-R31
+ case 'r': // R0-R31
+ if (VT == MVT::i64)
+ return std::make_pair(0U, SPU::R64CRegisterClass);
+ return std::make_pair(0U, SPU::R32CRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, SPU::R32FPRegisterClass);
+ else if (VT == MVT::f64)
+ return std::make_pair(0U, SPU::R64FPRegisterClass);
+ break;
+ case 'v':
+ return std::make_pair(0U, SPU::GPRCRegisterClass);
+ }
+ }
+
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth ) const {
+#if 0
+ const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+ switch (Op.getOpcode()) {
+ default:
+ // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+ break;
+ case CALL:
+ case SHUFB:
+ case SHUFFLE_MASK:
+ case CNTB:
+ case SPUISD::PREFSLOT2VEC:
+ case SPUISD::LDRESULT:
+ case SPUISD::VEC2PREFSLOT:
+ case SPUISD::SHLQUAD_L_BITS:
+ case SPUISD::SHLQUAD_L_BYTES:
+ case SPUISD::VEC_ROTL:
+ case SPUISD::VEC_ROTR:
+ case SPUISD::ROTBYTES_LEFT:
+ case SPUISD::SELECT_MASK:
+ case SPUISD::SELB:
+ }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+
+ case ISD::SETCC: {
+ EVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ // Default, for the time being, to the base class handler
+ TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+ Type *Ty) const {
+ // SPU's addresses are 256K:
+ return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
+ return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The SPU target isn't yet aware of offsets.
+ return false;
+}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ //ceqi, cgti, etc. all take s10 operand
+ return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type * ) const{
+
+ // A-form: 18bit absolute address.
+ if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+ return true;
+
+ // D-form: reg + 14bit offset
+ if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+ return true;
+
+ // X-form: reg+reg
+ if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+ return true;
+
+ return false;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 000000000000..e3db7b2f1fbc
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,185 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+ namespace SPUISD {
+ enum NodeType {
+ // Start the numbering where the builting ops and target ops leave off.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Pseudo instructions:
+ RET_FLAG, ///< Return with flag, matched by bi instruction
+
+ Hi, ///< High address component (upper 16)
+ Lo, ///< Low address component (lower 16)
+ PCRelAddr, ///< Program counter relative address
+ AFormAddr, ///< A-form address (local store)
+ IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)"
+
+ LDRESULT, ///< Load result (value, chain)
+ CALL, ///< CALL instruction
+ SHUFB, ///< Vector shuffle (permute)
+ SHUFFLE_MASK, ///< Shuffle mask
+ CNTB, ///< Count leading ones in bytes
+ PREFSLOT2VEC, ///< Promote scalar->vector
+ VEC2PREFSLOT, ///< Extract element 0
+ SHL_BITS, ///< Shift quad left, by bits
+ SHL_BYTES, ///< Shift quad left, by bytes
+ SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros.
+ VEC_ROTL, ///< Vector rotate left
+ VEC_ROTR, ///< Vector rotate right
+ ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI)
+ ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count
+ SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+ SELB, ///< Select bits -> (b & mask) | (a & ~mask)
+ // Markers: These aren't used to generate target-dependent nodes, but
+ // are used during instruction selection.
+ ADD64_MARKER, ///< i64 addition marker
+ SUB64_MARKER, ///< i64 subtraction marker
+ MUL64_MARKER, ///< i64 multiply marker
+ LAST_SPUISD ///< Last user-defined instruction
+ };
+ }
+
+ //! Utility functions specific to CellSPU:
+ namespace SPU {
+ SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+ EVT ValueType);
+ SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+ SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+ const SPUTargetMachine &TM);
+ //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
+ SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
+ DebugLoc dl);
+ }
+
+ class SPUTargetMachine; // forward dec'l.
+
+ class SPUTargetLowering :
+ public TargetLowering
+ {
+ int VarArgsFrameIndex; // FrameIndex for start of varargs area.
+ SPUTargetMachine &SPUTM;
+
+ public:
+ //! The venerable constructor
+ /*!
+ This is where the CellSPU backend sets operation handling (i.e., legal,
+ custom, expand or promote.)
+ */
+ SPUTargetLowering(SPUTargetMachine &TM);
+
+ //! Get the target machine
+ SPUTargetMachine &getSPUTargetMachine() {
+ return SPUTM;
+ }
+
+ /// getTargetNodeName() - This method returns the name of a target specific
+ /// DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - Return the ValueType for ISD::SETCC
+ virtual EVT getSetCCResultType(EVT VT) const;
+
+ virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+ //! Custom lowering hooks
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+ //! Custom lowering hook for nodes with illegal result types.
+ virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const;
+
+ virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+ virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth = 0) const;
+
+ ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const;
+
+ /// isLegalAddressImmediate - Return true if the integer value can be used
+ /// as the offset of the target addressing mode.
+ virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
+ virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool doesNotRet, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const;
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG) const;
+
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+ virtual bool isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 000000000000..b495537fc2c8
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function. This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+ bool mem = true) {
+ if (mem)
+ return MIB.addImm(Offset).addFrameIndex(FI);
+ else
+ return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 000000000000..cd3f42214345
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,320 @@
+//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+ : Instruction {
+ field bits<32> Inst;
+
+ let Namespace = "SPU";
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin> {
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+ // RR Format, where RB is zeroed (dont care):
+ class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ let RA = 0 in {
+ // RR Format, where RA and RB are zeroed (dont care):
+ // Used for reads from status control registers (see FPSCRRr32)
+ class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+ }
+}
+
+let RT = 0 in {
+ // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+ // says, "RT is a false target." Used in "Halt if" instructions
+ class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RB;
+ bits<7> RC;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-3} = opcode;
+ let Inst{4-10} = RT;
+ let Inst{11-17} = RB;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> i7;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = opcode;
+ let Inst{11-17} = i7;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-9} = opcode;
+ let Inst{10-17} = 0;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+ class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ let RT = 0 in {
+ // Branch instruction format (without D/E flag settings)
+ class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+
+ class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+ : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+ pattern>
+ { }
+
+ let RB = 0 in {
+ // Return instruction (bi, branch indirect), RA is zero (LR):
+ class RETForm<string asmstr, list<dag> pattern>
+ : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+ pattern>
+ { }
+ }
+ }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+ : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+ bits<7> Rcalldest;
+
+ let Pattern = pattern;
+
+ let Inst{0-10} = 0b11010101100;
+ let Inst{11} = 0;
+ let Inst{12-13} = DE_flag;
+ let Inst{14-17} = 0b0000;
+ let Inst{18-24} = Rcalldest;
+ let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<10> i10;
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-7} = opcode;
+ let Inst{8-17} = i10;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+ class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+ class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+ { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<16> i16;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-8} = opcode;
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+ class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+
+ class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern>
+ : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+ { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, itin>
+{
+ bits<18> i18;
+ bits<7> RT;
+
+ let Pattern = pattern;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-24} = i18;
+ let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+ !strconcat(opc, " $rT, $rA, $val"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+ Intrinsic IntID> :
+ RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ !strconcat(opc, " $rT, $rA, $rB"), itin,
+ [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+ : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+ let OutOperandList = OOL;
+ let InOperandList = IOL;
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Inst{31-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+ : Instruction {
+ field bits<32> Inst;
+ bits<16>i16;
+ bits<9>RO;
+
+ let Namespace = "SPU";
+ let InOperandList = IOL;
+ let OutOperandList = (outs); //no output
+ let AsmString = asmstr;
+ let Itinerary = BranchHints;
+
+ let Inst{0-6} = opcode;
+ let Inst{7-8} = RO{8-7};
+ let Inst{9-24} = i16;
+ let Inst{25-31} = RO{6-0};
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 000000000000..759923d7bb42
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,453 @@
+//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SPUGenInstrInfo.inc"
+
+using namespace llvm;
+
+namespace {
+ //! Predicate for an unconditional branch instruction
+ inline bool isUncondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BR
+ || opc == SPU::BRA
+ || opc == SPU::BI);
+ }
+
+ //! Predicate for a conditional branch instruction
+ inline bool isCondBranch(const MachineInstr *I) {
+ unsigned opc = I->getOpcode();
+
+ return (opc == SPU::BRNZr32
+ || opc == SPU::BRNZv4i32
+ || opc == SPU::BRZr32
+ || opc == SPU::BRZv4i32
+ || opc == SPU::BRHNZr16
+ || opc == SPU::BRHNZv8i16
+ || opc == SPU::BRHZr16
+ || opc == SPU::BRHZv8i16);
+ }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+ : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+ TM(tm),
+ RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+ const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ assert(TII && "No InstrInfo?");
+ return new SPUHazardRecognizer(*TII);
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::LQDv16i8:
+ case SPU::LQDv8i16:
+ case SPU::LQDv4i32:
+ case SPU::LQDv4f32:
+ case SPU::LQDv2f64:
+ case SPU::LQDr128:
+ case SPU::LQDr64:
+ case SPU::LQDr32:
+ case SPU::LQDr16: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default: break;
+ case SPU::STQDv16i8:
+ case SPU::STQDv8i16:
+ case SPU::STQDv4i32:
+ case SPU::STQDv4f32:
+ case SPU::STQDv2f64:
+ case SPU::STQDr128:
+ case SPU::STQDr64:
+ case SPU::STQDr32:
+ case SPU::STQDr16:
+ case SPU::STQDr8: {
+ const MachineOperand MOp1 = MI->getOperand(1);
+ const MachineOperand MOp2 = MI->getOperand(2);
+ if (MOp1.isImm() && MOp2.isFI()) {
+ FrameIndex = MOp2.getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
+ }
+ return 0;
+}
+
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
+{
+ // We support cross register class moves for our aliases, such as R3 in any
+ // reg class to any other reg class containing R3. This is required because
+ // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+ // types have no specific meaning.
+
+ BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+ } else {
+ llvm_unreachable("Unknown regclass!");
+ }
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+ .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
+{
+ unsigned opc;
+ bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+ if (RC == SPU::GPRCRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+ } else if (RC == SPU::R64CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R64FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+ } else if (RC == SPU::R32CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R32FPRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+ } else if (RC == SPU::R16CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+ } else if (RC == SPU::R8CRegisterClass) {
+ opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+ } else if (RC == SPU::VECREGRegisterClass) {
+ opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+ } else {
+ llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
+ }
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+//! Branch analysis
+/*!
+ \note This code was kiped from PPC. There may be more branch analysis for
+ CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ // If the block has no terminators, it just falls into the block after it.
+ MachineBasicBlock::iterator I = MBB.end();
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranch(LastInst)) {
+ // Check for jump tables
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else if (isCondBranch(LastInst)) {
+ // Block ends with fall-through condbranch.
+ TBB = LastInst->getOperand(1).getMBB();
+ DEBUG(errs() << "Pushing LastInst: ");
+ DEBUG(LastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ return false;
+ }
+ // Otherwise, don't know what this is.
+ return true;
+ }
+
+ // Get the instruction before it if it's a terminator.
+ MachineInstr *SecondLastInst = I;
+
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() &&
+ isUnpredicatedTerminator(--I))
+ return true;
+
+ // If the block ends with a conditional and unconditional branch, handle it.
+ if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(1).getMBB();
+ DEBUG(errs() << "Pushing SecondLastInst: ");
+ DEBUG(SecondLastInst->dump());
+ Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+ Cond.push_back(SecondLastInst->getOperand(0));
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed, so remove it.
+ if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return false;
+ }
+
+ // Otherwise, can't handle this.
+ return true;
+}
+
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+ if (I->getOpcode() == SPU::HBRA ||
+ I->getOpcode() == SPU::HBR_LABEL){
+ I=MBB.erase(I);
+ if (I == MBB.end())
+ break;
+ }
+ }
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator I = MBB.end();
+ removeHBR(MBB);
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
+ if (!isCondBranch(I) && !isUncondBranch(I))
+ return 0;
+
+ // Remove the first branch.
+ DEBUG(errs() << "Removing branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ I = MBB.end();
+ if (I == MBB.begin())
+ return 1;
+
+ --I;
+ if (!(isCondBranch(I) || isUncondBranch(I)))
+ return 1;
+
+ // Remove the second branch.
+ DEBUG(errs() << "Removing second branch: ");
+ DEBUG(I->dump());
+ I->eraseFromParent();
+ return 2;
+}
+
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ * -the branch hint delays
+ * -congestion of the memory bus
+ * -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+ MachineBasicBlock::iterator J = MBB.end();
+ for( int i=0; i<8; i++) {
+ if( J == MBB.begin() ) return J;
+ J--;
+ }
+ return J;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+ assert((Cond.size() == 2 || Cond.size() == 0) &&
+ "SPU branch conditions have two components!");
+
+ MachineInstrBuilder MIB;
+ //TODO: make a more accurate algorithm.
+ bool haveHBR = MBB.size()>8;
+
+ removeHBR(MBB);
+ MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+ // Add a label just before the branch
+ if (haveHBR)
+ MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
+ // One-way branch.
+ if (FBB == 0) {
+ if (Cond.empty()) {
+ // Unconditional branch
+ MIB = BuildMI(&MBB, DL, get(SPU::BR));
+ MIB.addMBB(TBB);
+
+ DEBUG(errs() << "Inserted one-way uncond branch: ");
+ DEBUG((*MIB).dump());
+
+ // basic blocks have just one branch so it is safe to add the hint a its
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+ } else {
+ // Conditional branch
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+ if (haveHBR) {
+ MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(TBB);
+ }
+
+ DEBUG(errs() << "Inserted one-way cond branch: ");
+ DEBUG((*MIB).dump());
+ }
+ return 1;
+ } else {
+ MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
+
+ // Two-way Conditional Branch.
+ MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+ MIB2.addMBB(FBB);
+
+ if (haveHBR) {
+ MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+ MIB.addSym(branchLabel);
+ MIB.addMBB(FBB);
+ }
+
+ DEBUG(errs() << "Inserted conditional branch: ");
+ DEBUG((*MIB).dump());
+ DEBUG(errs() << "part 2: ");
+ DEBUG((*MIB2).dump());
+ return 2;
+ }
+}
+
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+ const {
+ // Pretty brainless way of inverting the condition, but it works, considering
+ // there are only two conditions...
+ static struct {
+ unsigned Opc; //! The incoming opcode
+ unsigned RevCondOpc; //! The reversed condition opcode
+ } revconds[] = {
+ { SPU::BRNZr32, SPU::BRZr32 },
+ { SPU::BRNZv4i32, SPU::BRZv4i32 },
+ { SPU::BRZr32, SPU::BRNZr32 },
+ { SPU::BRZv4i32, SPU::BRNZv4i32 },
+ { SPU::BRHNZr16, SPU::BRHZr16 },
+ { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+ { SPU::BRHZr16, SPU::BRHNZr16 },
+ { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+ };
+
+ unsigned Opc = unsigned(Cond[0].getImm());
+ // Pretty dull mapping between the two conditions that SPU can generate:
+ for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+ if (revconds[i].Opc == Opc) {
+ Cond[0].setImm(revconds[i].RevCondOpc);
+ return false;
+ }
+ }
+
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 000000000000..85e5821aefa1
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,84 @@
+//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "SPUGenInstrInfo.inc"
+
+namespace llvm {
+ //! Cell SPU instruction information class
+ class SPUInstrInfo : public SPUGenInstrInfo {
+ SPUTargetMachine &TM;
+ const SPURegisterInfo RI;
+ public:
+ explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
+
+ //! Store a register to a stack slot, based on its register class.
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ //! Load a register from a stack slot, based on its register class.
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
+
+ //! Reverses a branch's condition, returning false on success.
+ virtual
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+ virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const;
+
+ virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ };
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 000000000000..f76ebd75bfef
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4484 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+ def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKDOWN",
+ [(callseq_start timm:$amt)]>;
+ def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt),
+ "${:comment} ADJCALLSTACKUP",
+ [(callseq_end timm:$amt)]>;
+ def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ),
+ "$targ:\t${:comment}branch hint target",[ ]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+ class LoadDFormVec<ValueType vectype>
+ : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+ { }
+
+ class LoadDForm<RegisterClass rclass>
+ : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+ "lqd\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load dform_addr:$src))]>
+ { }
+
+ multiclass LoadDForms
+ {
+ def v16i8: LoadDFormVec<v16i8>;
+ def v8i16: LoadDFormVec<v8i16>;
+ def v4i32: LoadDFormVec<v4i32>;
+ def v2i64: LoadDFormVec<v2i64>;
+ def v4f32: LoadDFormVec<v4f32>;
+ def v2f64: LoadDFormVec<v2f64>;
+
+ def r128: LoadDForm<GPRC>;
+ def r64: LoadDForm<R64C>;
+ def r32: LoadDForm<R32C>;
+ def f32: LoadDForm<R32FP>;
+ def f64: LoadDForm<R64FP>;
+ def r16: LoadDForm<R16C>;
+ def r8: LoadDForm<R8C>;
+ }
+
+ class LoadAFormVec<ValueType vectype>
+ : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+ { }
+
+ class LoadAForm<RegisterClass rclass>
+ : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+ "lqa\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load aform_addr:$src))]>
+ { }
+
+ multiclass LoadAForms
+ {
+ def v16i8: LoadAFormVec<v16i8>;
+ def v8i16: LoadAFormVec<v8i16>;
+ def v4i32: LoadAFormVec<v4i32>;
+ def v2i64: LoadAFormVec<v2i64>;
+ def v4f32: LoadAFormVec<v4f32>;
+ def v2f64: LoadAFormVec<v2f64>;
+
+ def r128: LoadAForm<GPRC>;
+ def r64: LoadAForm<R64C>;
+ def r32: LoadAForm<R32C>;
+ def f32: LoadAForm<R32FP>;
+ def f64: LoadAForm<R64FP>;
+ def r16: LoadAForm<R16C>;
+ def r8: LoadAForm<R8C>;
+ }
+
+ class LoadXFormVec<ValueType vectype>
+ : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+ { }
+
+ class LoadXForm<RegisterClass rclass>
+ : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+ "lqx\t$rT, $src",
+ LoadStore,
+ [(set rclass:$rT, (load xform_addr:$src))]>
+ { }
+
+ multiclass LoadXForms
+ {
+ def v16i8: LoadXFormVec<v16i8>;
+ def v8i16: LoadXFormVec<v8i16>;
+ def v4i32: LoadXFormVec<v4i32>;
+ def v2i64: LoadXFormVec<v2i64>;
+ def v4f32: LoadXFormVec<v4f32>;
+ def v2f64: LoadXFormVec<v2f64>;
+
+ def r128: LoadXForm<GPRC>;
+ def r64: LoadXForm<R64C>;
+ def r32: LoadXForm<R32C>;
+ def f32: LoadXForm<R32FP>;
+ def f64: LoadXForm<R64FP>;
+ def r16: LoadXForm<R16C>;
+ def r8: LoadXForm<R8C>;
+ }
+
+ defm LQA : LoadAForms;
+ defm LQD : LoadDForms;
+ defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+ Might be of use later for relocatable code. It's effectively the
+ same as LQA, but uses PC-relative addressing.
+ def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+ "lqr\t$rT, $disp", LoadStore,
+ [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+ : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+ : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+ "stqd\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+ def v16i8: StoreDFormVec<v16i8>;
+ def v8i16: StoreDFormVec<v8i16>;
+ def v4i32: StoreDFormVec<v4i32>;
+ def v2i64: StoreDFormVec<v2i64>;
+ def v4f32: StoreDFormVec<v4f32>;
+ def v2f64: StoreDFormVec<v2f64>;
+
+ def r128: StoreDForm<GPRC>;
+ def r64: StoreDForm<R64C>;
+ def r32: StoreDForm<R32C>;
+ def f32: StoreDForm<R32FP>;
+ def f64: StoreDForm<R64FP>;
+ def r16: StoreDForm<R16C>;
+ def r8: StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+ : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+ : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+ "stqa\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+ def v16i8: StoreAFormVec<v16i8>;
+ def v8i16: StoreAFormVec<v8i16>;
+ def v4i32: StoreAFormVec<v4i32>;
+ def v2i64: StoreAFormVec<v2i64>;
+ def v4f32: StoreAFormVec<v4f32>;
+ def v2f64: StoreAFormVec<v2f64>;
+
+ def r128: StoreAForm<GPRC>;
+ def r64: StoreAForm<R64C>;
+ def r32: StoreAForm<R32C>;
+ def f32: StoreAForm<R32FP>;
+ def f64: StoreAForm<R64FP>;
+ def r16: StoreAForm<R16C>;
+ def r8: StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+ : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+ : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+ "stqx\t$rT, $src",
+ LoadStore,
+ [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+ def v16i8: StoreXFormVec<v16i8>;
+ def v8i16: StoreXFormVec<v8i16>;
+ def v4i32: StoreXFormVec<v4i32>;
+ def v2i64: StoreXFormVec<v2i64>;
+ def v4f32: StoreXFormVec<v4f32>;
+ def v2f64: StoreXFormVec<v2f64>;
+
+ def r128: StoreXForm<GPRC>;
+ def r64: StoreXForm<R64C>;
+ def r32: StoreXForm<R32C>;
+ def f32: StoreXForm<R32FP>;
+ def f64: StoreXForm<R64FP>;
+ def r16: StoreXForm<R16C>;
+ def r8: StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+ be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+ "stqr\t$rT, $disp", LoadStore,
+ [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cbd\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cbx\t$rT, $src", ShuffleOp,
+ [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "chd\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "chx\t$rT, $src", ShuffleOp,
+ [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cwd\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cwx\t$rT, $src", ShuffleOp,
+ [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+ "cdd\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+ "cdx\t$rT, $src", ShuffleOp,
+ [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+ RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+ RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+ RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+ "ilh\t$rT, $val", ImmLoad,
+ [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+ def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+ def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+ // TODO: Need v2f64, v4f32
+
+ def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+ def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+ def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+ def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+ ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+ def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+ def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+ def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+ def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+ // Loads the high portion of an address
+ def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+ // Used in custom lowering constant SFP loads:
+ def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+ LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+ [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+ ILAInst<(outs rclass:$rT), (ins immtype:$val),
+ [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+ def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+ def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+ def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+ def r32: ILARegInst<R32C, u18imm, imm18>;
+ def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+ def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+ def hi: ILARegInst<R32C, symbolHi, imm18>;
+ def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+ def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+ [(set R32C:$rT, imm18:$val)]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+ ImmLoad, pattern>,
+ RegConstraint<"$rS = $rT">,
+ NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+ IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+ [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+ def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+ def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+ def r32: IOHLRegInst<R32C, i32imm>;
+ def f32: IOHLRegInst<R32FP, f32imm>;
+
+ def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+ RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+ "fsmbi\t$rT, $val",
+ SelectOp,
+ [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+ def v16i8: FSMBIVec<v16i8>;
+ def v8i16: FSMBIVec<v8i16>;
+ def v4i32: FSMBIVec<v4i32>;
+ def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+ FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+ def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+ def v16i8: FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+ FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT),
+ (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+ def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+ def v8i16: FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+ pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+ FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+ FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+ def v4i32: FSMVecInst<v4i32>;
+
+ def r32 : FSMRegInst<v4i32, R32C>;
+ def r16 : FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+ def r32 : FSMRegInst<v2i64, R32C>;
+ def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+ RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+ RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "ah\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+ RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+ RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "ahi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000011000, OOL, IOL,
+ "a\t$rT, $rA, $rB", IntegerOp,
+ pattern>;
+
+class AVecInst<ValueType vectype>:
+ AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+ AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+
+multiclass AddInstruction {
+ def v4i32: AVecInst<v4i32>;
+ def v16i8: AVecInst<v16i8>;
+ def r32: ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00111000, OOL, IOL,
+ "ai\t$rT, $rA, $val", IntegerOp,
+ pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+ AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+ AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+
+multiclass AddImmediate {
+ def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+ def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+ def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+ def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+ RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+ RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "sfh\t$rT, $rA, $rB", IntegerOp,
+ [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
+
+def SFHIvec:
+ RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+ (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "sfhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "sf\t$rT, $rA, $rB", IntegerOp,
+ [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
+
+def SFIvec:
+ RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+ (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+ (ins R32C:$rA, s10imm_i32:$val),
+ "sfi\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00000010110, OOL, IOL,
+ "addx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+ ADDXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+ ADDXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+ def v2i64 : ADDXVecInst<v2i64>;
+ def v4i32 : ADDXVecInst<v4i32>;
+ def r64 : ADDXRegInst<R64C>;
+ def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000011000, OOL, IOL,
+ "cg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+ CGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+ CGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass CarryGenerate {
+ def v2i64 : CGVecInst<v2i64>;
+ def v4i32 : CGVecInst<v4i32>;
+ def r64 : CGRegInst<R64C>;
+ def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010110, OOL, IOL,
+ "sfx\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+ SFXInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+ SFXInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+ [/* no pattern */]>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+ def v2i64 : SFXVecInst<v2i64>;
+ def v4i32 : SFXVecInst<v4i32>;
+ def r64 : SFXRegInst<R64C>;
+ def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01000010000, OOL, IOL,
+ "bg\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+ BGInst<(outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+ BGInst<(outs rclass:$rT),
+ (ins rclass:$rA, rclass:$rB),
+ [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+ def v4i32 : BGVecInst<v4i32>;
+ def v2i64 : BGVecInst<v2i64>;
+ def r64 : BGRegInst<R64C>;
+ def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+ RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+ VECREG:$rCarry),
+ "bgx\t$rT, $rA, $rB", IntegerOp,
+ []>,
+ RegConstraint<"$rCarry = $rT">,
+ NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+ RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYr16:
+ RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+ [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00110011110, OOL, IOL,
+ "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYUv4i32:
+ MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYUr16:
+ MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+ MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00101110, OOL, IOL,
+ "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYIvec:
+ MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+ MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10101110, OOL, IOL,
+ "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+ pattern>;
+
+def MPYUIvec:
+ MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ []>;
+
+def MPYUIr16:
+ MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b0011, OOL, IOL,
+ "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+ pattern>;
+
+def MPYAv4i32:
+ MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4i32 VECREG:$rT),
+ (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))),
+ (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sext:
+ MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+ R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+ MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+ [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+ (sext_inreg R32C:$rB, i16)),
+ R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10100011110, OOL, IOL,
+ "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+ pattern>;
+
+def MPYHv4i32:
+ MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern */]>;
+
+def MPYHr32:
+ MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+ RRForm<0b11100011110, OOL, IOL,
+ "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYSv4i32:
+ MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYSr16:
+ MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+ RRForm<0b01100011110, OOL, IOL,
+ "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHv8i16:
+ MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+ MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+ RRForm<0b01100010110, OOL, IOL,
+ "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAvec:
+ MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAr32:
+ MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+ +-------+-------+ +---------+
+// | a0 . a1 | x | b0 . b1 | = | a0 x b0 |
+// +-------+-------+ +-------+-------+ +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+ RRForm<0b01110011110, OOL, IOL,
+ "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHUv4i32:
+ MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHUr32:
+ MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+ RRForm<0b01110010110, OOL, IOL,
+ "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+ [/* no pattern */]>;
+
+def MPYHHAUvec:
+ MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHAUr32:
+ MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+ IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+ CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+ [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+ CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+ def v4i32 : CLZVecInst<v4i32>;
+ def r32 : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+ RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cntb\t$rT, $rA", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+ GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+ def v16i8_r32: GBBRegInst<R32C, v16i8>;
+ def v16i8_r16: GBBRegInst<R16C, v16i8>;
+ def v16i8: GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+ GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+ GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+ def v8i16_r32: GBHRegInst<R32C, v8i16>;
+ def v8i16_r16: GBHRegInst<R16C, v8i16>;
+ def v8i16: GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+ pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+ GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+ GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+ [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+ def v4i32_r32: GBRegInst<R32C, v4i32>;
+ def v4i32_r16: GBRegInst<R16C, v4i32>;
+ def v4i32: GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+ RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "avgb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+ RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "absdb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+ RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "sumb\t$rT, $rA, $rB", ByteOp,
+ []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL,
+ "xsbh\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+ pattern>;
+
+multiclass ExtendByteHalfword {
+ def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [
+ /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def r16: XSBHInRegInst<R16C,
+ [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+ // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+ // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+ // pattern below). Intentionally doesn't match a pattern because we want the
+ // sext 8->32 pattern to do the work for us, namely because we need the extra
+ // XSHWr32.
+ def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
+
+ // Same as the 32-bit version, but for i64
+ def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+ IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDest),
+ (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+ pattern>;
+
+class XSHWRegInst<RegisterClass rclass>:
+ XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+ [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+ def v4i32: XSHWVecInst<v8i16, v4i32>;
+
+ def r16: XSHWRegInst<R32C>;
+
+ def r32: XSHWInRegInst<R32C,
+ [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+ def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+ IntegerOp, pattern>;
+
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+ [/*(set (out_vectype VECREG:$rDst),
+ (sext (out_vectype VECREG:$rSrc)))*/]>;
+
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+ XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+ [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+
+multiclass ExtendWordToDoubleWord {
+ def v2i64: XSWDVecInst<v4i32, v2i64>;
+ def r64: XSWDRegInst<R32C, R64C>;
+
+ def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+ [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+ ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+ ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+ def v16i8: ANDVecInst<v16i8>;
+ def v8i16: ANDVecInst<v8i16>;
+ def v4i32: ANDVecInst<v4i32>;
+ def v2i64: ANDVecInst<v2i64>;
+
+ def r128: ANDRegInst<GPRC>;
+ def r64: ANDRegInst<R64C>;
+ def r32: ANDRegInst<R32C>;
+ def r16: ANDRegInst<R16C>;
+ def r8: ANDRegInst<R8C>;
+
+ //===---------------------------------------------
+ // Special instructions to perform the fabs instruction
+ def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* Intentionally does not match a pattern */]>;
+
+ //===---------------------------------------------
+
+ // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+ // quantities -- see 16->32 zext pattern.
+ //
+ // This pattern is somewhat artificial, since it might match some
+ // compiler generated pattern but it is unlikely to do so.
+
+ def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (and (vectype VECREG:$rA),
+ (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+ ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+ def v16i8: ANDCVecInst<v16i8>;
+ def v8i16: ANDCVecInst<v8i16>;
+ def v4i32: ANDCVecInst<v4i32>;
+ def v2i64: ANDCVecInst<v2i64>;
+
+ def r128: ANDCRegInst<GPRC>;
+ def r64: ANDCRegInst<R64C>;
+ def r32: ANDCRegInst<R32C>;
+ def r16: ANDCRegInst<R16C>;
+ def r8: ANDCRegInst<R8C>;
+
+ // Sometimes, the xor pattern has a bitcast constant:
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+ def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT),
+ (and (v16i8 VECREG:$rA),
+ (v16i8 v16i8U8Imm:$val)))]>;
+
+ def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+ def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+ def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Zero-extend i8 to i16:
+ def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+ [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+ def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+ def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+ // pattern below.
+ def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+ // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+ // zext 16->32 pattern below.
+ //
+ // Note that this pattern is somewhat artificial, since it might match
+ // something the compiler generates but is unlikely to occur in practice.
+ def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT,
+ (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+ ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+ ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+
+multiclass BitwiseOr
+{
+ def v16i8: ORVecInst<v16i8>;
+ def v8i16: ORVecInst<v8i16>;
+ def v4i32: ORVecInst<v4i32>;
+ def v2i64: ORVecInst<v2i64>;
+
+ def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4f32 VECREG:$rT),
+ (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+ (v4i32 VECREG:$rB)))))]>;
+
+ def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v2f64 VECREG:$rT),
+ (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+ (v2i64 VECREG:$rB)))))]>;
+
+ def r128: ORRegInst<GPRC>;
+ def r64: ORRegInst<R64C>;
+ def r32: ORRegInst<R32C>;
+ def r16: ORRegInst<R16C>;
+ def r8: ORRegInst<R8C>;
+
+ // OR instructions used to copy f32 and f64 registers.
+ def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [/* no pattern */]>;
+
+ def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ [/* no pattern */]>;
+}
+
+defm OR : BitwiseOr;
+
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+ (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+ (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
+
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
+
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
+
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
+
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
+
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+ (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+ class LRInst<dag OOL, dag IOL>
+ : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+ bits<7> RA;
+ bits<7> RT;
+
+ let Pattern = [/*no pattern*/];
+
+ let Inst{0-10} = 0b10000010000; /* It's an OR operation */
+ let Inst{11-17} = RA;
+ let Inst{18-24} = RA;
+ let Inst{25-31} = RT;
+ }
+
+ class LRVecInst<ValueType vectype>:
+ LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+ class LRRegInst<RegisterClass rclass>:
+ LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+ multiclass LoadRegister {
+ def v2i64: LRVecInst<v2i64>;
+ def v2f64: LRVecInst<v2f64>;
+ def v4i32: LRVecInst<v4i32>;
+ def v4f32: LRVecInst<v4f32>;
+ def v8i16: LRVecInst<v8i16>;
+ def v16i8: LRVecInst<v16i8>;
+
+ def r128: LRRegInst<GPRC>;
+ def r64: LRRegInst<R64C>;
+ def f64: LRRegInst<R64FP>;
+ def r32: LRRegInst<R32C>;
+ def f32: LRRegInst<R32FP>;
+ def r16: LRRegInst<R16C>;
+ def r8: LRRegInst<R8C>;
+ }
+
+ defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+ ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+ ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+ def v16i8: ORCVecInst<v16i8>;
+ def v8i16: ORCVecInst<v8i16>;
+ def v4i32: ORCVecInst<v4i32>;
+ def v2i64: ORCVecInst<v2i64>;
+
+ def r128: ORCRegInst<GPRC>;
+ def r64: ORCRegInst<R64C>;
+ def r32: ORCRegInst<R32C>;
+ def r16: ORCRegInst<R16C>;
+ def r8: ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+ (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+ def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+ def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+ def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+ def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+ [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+ // Specialized ORHI form used to promote 8-bit registers to 16-bit
+ def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+ [(set R16C:$rT, (or (anyext R8C:$rA),
+ i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+ ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+ immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+ def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+ def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+ // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R16C:$rA),
+ i32ImmSExt10:$val))]>;
+
+ // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+ // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+ // infra "anyext 16->32" pattern.)
+ def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (or (anyext R8C:$rA),
+ i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+ RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "orx\t$rT, $rA, $rB", IntegerOp,
+ []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+ XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+ (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+ XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+ def v16i8: XORVecInst<v16i8>;
+ def v8i16: XORVecInst<v8i16>;
+ def v4i32: XORVecInst<v4i32>;
+ def v2i64: XORVecInst<v2i64>;
+
+ def r128: XORRegInst<GPRC>;
+ def r64: XORRegInst<R64C>;
+ def r32: XORRegInst<R32C>;
+ def r16: XORRegInst<R16C>;
+ def r8: XORRegInst<R8C>;
+
+ // XOR instructions used to negate f32 and f64 quantities.
+
+ def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+ def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+ [/* no pattern */]>;
+
+ def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+ IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+ def v16i8:
+ XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+ def r8:
+ XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+ [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+ RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+ v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+ RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ "xorhi\t$rT, $rA, $val", IntegerOp,
+ [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+ RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+ v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+ RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ "xori\t$rT, $rA, $val", IntegerOp,
+ [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+ NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+ NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+ def v16i8: NANDVecInst<v16i8>;
+ def v8i16: NANDVecInst<v8i16>;
+ def v4i32: NANDVecInst<v4i32>;
+ def v2i64: NANDVecInst<v2i64>;
+
+ def r128: NANDRegInst<GPRC>;
+ def r64: NANDRegInst<R64C>;
+ def r32: NANDRegInst<R32C>;
+ def r16: NANDRegInst<R16C>;
+ def r8: NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+ NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+ (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+ NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+ def v16i8: NORVecInst<v16i8>;
+ def v8i16: NORVecInst<v8i16>;
+ def v4i32: NORVecInst<v4i32>;
+ def v2i64: NORVecInst<v2i64>;
+
+ def r128: NORRegInst<GPRC>;
+ def r64: NORRegInst<R64C>;
+ def r32: NORRegInst<R32C>;
+ def r16: NORRegInst<R16C>;
+ def r8: NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+ IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+ (and (vnot_frag (vectype VECREG:$rC)),
+ (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (vectype VECREG:$rT),
+ (select (vectype VECREG:$rC),
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+ [(set (vectype VECREG:$rT),
+ (select R32C:$rC,
+ (vectype VECREG:$rB),
+ (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+ [(set rclass:$rT,
+ (or (and rclass:$rB, rclass:$rC),
+ (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+ SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+ [(set rclass:$rT,
+ (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+ def v16i8: SELBVecInst<v16i8>;
+ def v8i16: SELBVecInst<v8i16>;
+ def v4i32: SELBVecInst<v4i32>;
+ def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
+
+ def r128: SELBRegInst<GPRC>;
+ def r64: SELBRegInst<R64C>;
+ def r32: SELBRegInst<R32C>;
+ def r16: SELBRegInst<R16C>;
+ def r8: SELBRegInst<R8C>;
+
+ def v16i8_cond: SELBVecCondInst<v16i8>;
+ def v8i16_cond: SELBVecCondInst<v8i16>;
+ def v4i32_cond: SELBVecCondInst<v4i32>;
+ def v2i64_cond: SELBVecCondInst<v2i64>;
+
+ def v16i8_vcond: SELBVecCondInst<v16i8>;
+ def v8i16_vcond: SELBVecCondInst<v8i16>;
+ def v4i32_vcond: SELBVecCondInst<v4i32>;
+ def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+ def v4f32_cond:
+ SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (v4f32 VECREG:$rT),
+ (select (v4i32 VECREG:$rC),
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)))]>;
+
+ // SELBr64_cond is defined in SPU64InstrInfo.td
+ def r32_cond: SELBRegCondInst<R32C, R32C>;
+ def f32_cond: SELBRegCondInst<R32C, R32FP>;
+ def r16_cond: SELBRegCondInst<R16C, R16C>;
+ def r8_cond: SELBRegCondInst<R8C, R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+ Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+ (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+ Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+ (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C, SELBr8>;
+def : SPUselbPatReg<R16C, SELBr16>;
+def : SPUselbPatReg<R32C, SELBr32>;
+def : SPUselbPatReg<R64C, SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+ IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (and (vnot (vectype VECREG:$rA)),
+ (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+ (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+ (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT,
+ (or (and rclass:$rA, rclass:$rB),
+ (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+ EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+ EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+ def v16i8: EQVVecInst<v16i8>;
+ def v8i16: EQVVecInst<v8i16>;
+ def v4i32: EQVVecInst<v4i32>;
+ def v2i64: EQVVecInst<v2i64>;
+
+ def v16i8_1: EQVVecPattern1<v16i8>;
+ def v8i16_1: EQVVecPattern1<v8i16>;
+ def v4i32_1: EQVVecPattern1<v4i32>;
+ def v2i64_1: EQVVecPattern1<v2i64>;
+
+ def v16i8_2: EQVVecPattern2<v16i8>;
+ def v8i16_2: EQVVecPattern2<v8i16>;
+ def v4i32_2: EQVVecPattern2<v4i32>;
+ def v2i64_2: EQVVecPattern2<v2i64>;
+
+ def v16i8_3: EQVVecPattern3<v16i8>;
+ def v8i16_3: EQVVecPattern3<v8i16>;
+ def v4i32_3: EQVVecPattern3<v4i32>;
+ def v2i64_3: EQVVecPattern3<v2i64>;
+
+ def r128: EQVRegInst<GPRC>;
+ def r64: EQVRegInst<R64C>;
+ def r32: EQVRegInst<R32C>;
+ def r16: EQVRegInst<R16C>;
+ def r8: EQVRegInst<R8C>;
+
+ def r128_1: EQVRegPattern1<GPRC>;
+ def r64_1: EQVRegPattern1<R64C>;
+ def r32_1: EQVRegPattern1<R32C>;
+ def r16_1: EQVRegPattern1<R16C>;
+ def r8_1: EQVRegPattern1<R8C>;
+
+ def r128_2: EQVRegPattern2<GPRC>;
+ def r64_2: EQVRegPattern2<R64C>;
+ def r32_2: EQVRegPattern2<R32C>;
+ def r16_2: EQVRegPattern2<R16C>;
+ def r8_2: EQVRegPattern2<R8C>;
+
+ def r128_3: EQVRegPattern3<GPRC>;
+ def r64_3: EQVRegPattern3<R64C>;
+ def r32_3: EQVRegPattern3<R32C>;
+ def r16_3: EQVRegPattern3<R16C>;
+ def r8_3: EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+ ShuffleOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+ SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ [(set (resultvec VECREG:$rT),
+ (SPUshuffle (resultvec VECREG:$rA),
+ (resultvec VECREG:$rB),
+ (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+ SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+ [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+ def v16i8 : SHUFBVecInst<v16i8, v16i8>;
+ def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+ def v8i16 : SHUFBVecInst<v8i16, v16i8>;
+ def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+ def v4i32 : SHUFBVecInst<v4i32, v16i8>;
+ def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+ def v2i64 : SHUFBVecInst<v2i64, v16i8>;
+ def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+ def v4f32 : SHUFBVecInst<v4f32, v16i8>;
+ def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+ def v2f64 : SHUFBVecInst<v2f64, v16i8>;
+ def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+ def gprc : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+ SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass ShiftLeftHalfword
+{
+ def v8i16: SHLHVecInst<v8i16>;
+ def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+ def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+ SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+ def v8i16: SHLHIVecInst<v8i16>;
+ def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+ (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+multiclass ShiftLeftWord
+{
+ def v4i32:
+ SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ def r32:
+ SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+ def v4i32:
+ SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+ def r32:
+ SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+ [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+ SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+ SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+ def v16i8: SHLQBIVecInst<v16i8>;
+ def v8i16: SHLQBIVecInst<v8i16>;
+ def v4i32: SHLQBIVecInst<v4i32>;
+ def v4f32: SHLQBIVecInst<v4f32>;
+ def v2i64: SHLQBIVecInst<v2i64>;
+ def v2f64: SHLQBIVecInst<v2f64>;
+
+ def r128: SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+ SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+ def v16i8 : SHLQBIIVecInst<v16i8>;
+ def v8i16 : SHLQBIIVecInst<v8i16>;
+ def v4i32 : SHLQBIIVecInst<v4i32>;
+ def v4f32 : SHLQBIIVecInst<v4f32>;
+ def v2i64 : SHLQBIIVecInst<v2i64>;
+ def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+ SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+ def v16i8: SHLQBYVecInst<v16i8>;
+ def v8i16: SHLQBYVecInst<v8i16>;
+ def v4i32: SHLQBYVecInst<v4i32>;
+ def v4f32: SHLQBYVecInst<v4f32>;
+ def v2i64: SHLQBYVecInst<v2i64>;
+ def v2f64: SHLQBYVecInst<v2f64>;
+ def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+ SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+ def v16i8: SHLQBYIVecInst<v16i8>;
+ def v8i16: SHLQBYIVecInst<v8i16>;
+ def v4i32: SHLQBYIVecInst<v4i32>;
+ def v4f32: SHLQBYIVecInst<v4f32>;
+ def v2i64: SHLQBYIVecInst<v2i64>;
+ def v2f64: SHLQBYIVecInst<v2f64>;
+ def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+ [(set GPRC:$rT,
+ (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+ SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+ SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+ def v16i8: SHLQBYBIVecInst<v16i8>;
+ def v8i16: SHLQBYBIVecInst<v8i16>;
+ def v4i32: SHLQBYBIVecInst<v4i32>;
+ def v4f32: SHLQBYBIVecInst<v4f32>;
+ def v2i64: SHLQBYBIVecInst<v2i64>;
+ def v2f64: SHLQBYBIVecInst<v2f64>;
+
+ def r128: SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+ ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+ ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+ [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+ def v8i16: ROTHVecInst<v8i16>;
+ def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+ ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+ def v8i16: ROTHIVecInst<v8i16>;
+ def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+ def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+ [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+ (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+ ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+ ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [(set rclass:$rT,
+ (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+ def v4i32: ROTVecInst<v4i32>;
+ def r32: ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+ (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+ ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+ [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+ (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+ ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+ def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+ def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+ def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+ def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>;
+ def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+ ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+ [(set (type rc:$rT),
+ (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+ ROTQBYGenInst<type, VECREG>;
+
+multiclass RotateQuadLeftByBytes
+{
+ def v16i8: ROTQBYVecInst<v16i8>;
+ def v8i16: ROTQBYVecInst<v8i16>;
+ def v4i32: ROTQBYVecInst<v4i32>;
+ def v4f32: ROTQBYVecInst<v4f32>;
+ def v2i64: ROTQBYVecInst<v2i64>;
+ def v2f64: ROTQBYVecInst<v2f64>;
+ def i128: ROTQBYGenInst<i128, GPRC>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+ ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+ [(set (type rclass:$rT),
+ (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+ ROTQBYIGenInst<vectype, VECREG>;
+
+multiclass RotateQuadByBytesImm
+{
+ def v16i8: ROTQBYIVecInst<v16i8>;
+ def v8i16: ROTQBYIVecInst<v8i16>;
+ def v4i32: ROTQBYIVecInst<v4i32>;
+ def v4f32: ROTQBYIVecInst<v4f32>;
+ def v2i64: ROTQBYIVecInst<v2i64>;
+ def vfi64: ROTQBYIVecInst<v2f64>;
+ def i128: ROTQBYIGenInst<i128, GPRC>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00110011100, OOL, IOL,
+ "rotqbybi\t$rT, $rA, $shift",
+ RotShiftQuad, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+ ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+ [(set (vectype VECREG:$rT),
+ (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+ def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+ def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+ def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+ def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+ ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+ ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+ def v16i8: ROTQBIVecInst<v16i8>;
+ def v8i16: ROTQBIVecInst<v8i16>;
+ def v4i32: ROTQBIVecInst<v4i32>;
+ def v2i64: ROTQBIVecInst<v2i64>;
+
+ def r128: ROTQBIRegInst<GPRC>;
+ def r64: ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+ def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+ def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+ def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+ def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+ def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+ def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+// so this only matches a synthetically generated/lowered code
+// fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+def ROTHMv8i16:
+ ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+ ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+ (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+ (ROTHMr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+def ROTHMIv8i16:
+ ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+ (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def ROTHMIr16:
+ ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+ [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+ (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+ RotShiftVec, pattern>;
+
+def ROTMv4i32:
+ ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
+
+def ROTMr32:
+ ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+ (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+ (ROTMr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+ RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
+ [(set (v4i32 VECREG:$rT),
+ (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+ RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+ "rotmi\t$rT, $rA, $val", RotShiftVec,
+ [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+ (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+ (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+ ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+ ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+ def v16i8: ROTQMBYVecInst<v16i8>;
+ def v8i16: ROTQMBYVecInst<v8i16>;
+ def v4i32: ROTQMBYVecInst<v4i32>;
+ def v2i64: ROTQMBYVecInst<v2i64>;
+
+ def r128: ROTQMBYRegInst<GPRC>;
+ def r64: ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+ (ROTQMBYr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0))>;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+ ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
+ ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+ ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+ def v16i8: ROTQMBYIVecInst<v16i8>;
+ def v8i16: ROTQMBYIVecInst<v8i16>;
+ def v4i32: ROTQMBYIVecInst<v4i32>;
+ def v2i64: ROTQMBYIVecInst<v2i64>;
+
+ def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+ def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+
+ def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+ def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+ def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+ def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+ ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+ def v16i8: ROTQMBYBIVecInst<v16i8>;
+ def v8i16: ROTQMBYBIVecInst<v8i16>;
+ def v4i32: ROTQMBYBIVecInst<v4i32>;
+ def v2i64: ROTQMBYBIVecInst<v2i64>;
+ def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+ [/*no pattern*/]>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+ RotShiftQuad, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+ ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+ ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+ def v16i8: ROTQMBIVecInst<v16i8>;
+ def v8i16: ROTQMBIVecInst<v8i16>;
+ def v4i32: ROTQMBIVecInst<v4i32>;
+ def v2i64: ROTQMBIVecInst<v2i64>;
+
+ def r128: ROTQMBIRegInst<GPRC>;
+ def r64: ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+ (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA,
+ (SFIr32 R32C:$rB, 0)),
+ (SFIr32 R32C:$rB, 0))>;
+
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+ RotShiftQuad, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+ ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+ ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+ [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+ def v16i8: ROTQMBIIVecInst<v16i8>;
+ def v8i16: ROTQMBIIVecInst<v8i16>;
+ def v4i32: ROTQMBIIVecInst<v4i32>;
+ def v2i64: ROTQMBIIVecInst<v2i64>;
+
+ def r128: ROTQMBIIRegInst<GPRC>;
+ def r64: ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+ RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+def ROTMAHr16:
+ RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+ "rotmah\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+ (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+ (ROTMAHr16 R16C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+ RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
+ [(set (v8i16 VECREG:$rT),
+ (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+ (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def ROTMAHIr16:
+ RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+ "rotmahi\t$rT, $rA, $val", RotShiftVec,
+ [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+ (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def ROTMAv4i32:
+ RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
+
+def ROTMAr32:
+ RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ "rotma\t$rT, $rA, $rB", RotShiftVec,
+ [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+ (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+ (ROTMAr32 R32C:$rA,
+ (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011110000, OOL, IOL,
+ "rotmai\t$rT, $rA, $val",
+ RotShiftVec, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+ [(set (vectype VECREG:$rT),
+ (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+ ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+ [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+ def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+ def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+ def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+ def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+ // Halt If Equal (r32 preferred slot only, no vector form)
+ def HEQr32:
+ RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+ "heq\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HEQIr32 :
+ RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+ "heqi\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+ // contrasting with HLGT/HLGTI, which use unsigned comparison:
+ def HGTr32:
+ RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HGTIr32:
+ RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTr32:
+ RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+ "hlgt\t$rA, $rB", BranchResolv,
+ [/* no pattern to match */]>;
+
+ def HLGTIr32:
+ RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+ "hlgti\t$rA, $val", BranchResolv,
+ [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+ def v16i8 :
+ CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+ def v16i8 :
+ CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+ def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+ def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (seteq (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+ def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+ def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (seteq (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+ def v16i8 :
+ CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+ def v16i8 :
+ CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+ def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+ def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setgt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+ def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+ def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+ // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+ def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+ [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+ def v16i8 :
+ CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r8 :
+ CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+ [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+ def v16i8 :
+ CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+ [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+ v16i8SExt8Imm:$val))]>;
+ def r8:
+ CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+ [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+ def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+ (v8i16 VECREG:$rB)))]>;
+
+ def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+ [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+ def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v8i16 VECREG:$rT),
+ (setugt (v8i16 VECREG:$rA),
+ (v8i16 v8i16SExt10Imm:$val)))]>;
+ def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+ [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+ RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+ def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+ def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+ [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+ RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+ ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+ def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+ [(set (v4i32 VECREG:$rT),
+ (setugt (v4i32 VECREG:$rA),
+ (v4i32 v4i32SExt16Imm:$val)))]>;
+
+ def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+ [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB : CmpEqualByte;
+defm CEQBI : CmpEqualByteImm;
+defm CEQH : CmpEqualHalfword;
+defm CEQHI : CmpEqualHalfwordImm;
+defm CEQ : CmpEqualWord;
+defm CEQI : CmpEqualWordImm;
+defm CGTB : CmpGtrByte;
+defm CGTBI : CmpGtrByteImm;
+defm CGTH : CmpGtrHalfword;
+defm CGTHI : CmpGtrHalfwordImm;
+defm CGT : CmpGtrWord;
+defm CGTI : CmpGtrWordImm;
+defm CLGTB : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT : CmpLGtrWord;
+defm CLGTI : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+// matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+// i64 comparisons. See the tblgen fundamentals documentation for what
+// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+// class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+ Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+ (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, rclass:$rB),
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType immtype,
+ SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+ Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+ (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+ (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setle R8C:$rA, immU8:$imm),
+ (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+ (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+ (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+ (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def : Pat<(setule R8C:$rA, immU8:$imm),
+ (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+ ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+ CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+ (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+ (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+ ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+ (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+ (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+ Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rTrue, rclass:$rFalse,
+ (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, rclass:$rB),
+ (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+ ValueType inttype,
+ SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+ SPUInstr cmpOp2>:
+ Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+ rclass:$rTrue, rclass:$rFalse),
+ (selinstr rclass:$rFalse, rclass:$rTrue,
+ (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+ (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+ SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+ SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+ SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+ SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+ SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+ // All calls clobber the non-callee-saved registers:
+ Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+ R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+ R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+ R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+ R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+ R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+ R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+ R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+ // All of these instructions use $lr (aka $0)
+ Uses = [R0] in {
+ // Branch relative and set link: Used if we actually know that the target
+ // is within [-32768, 32767] bytes of the target
+ def BRSL:
+ BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+ "brsl\t$$lr, $func",
+ [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+ // Branch absolute and set link: Used if we actually know that the target
+ // is an absolute address
+ def BRASL:
+ BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+ "brasl\t$$lr, $func",
+ [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+ // Branch indirect and set link if external data. These instructions are not
+ // actually generated, matched by an intrinsic:
+ def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+ def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+ // Branch indirect and set link. This is the "X-form" address version of a
+ // function call
+ def BISL:
+ BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+ (BRSL texternalsym:$func)>;
+
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+ (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ let isBarrier = 1 in {
+ def BR :
+ UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+ "br\t$dest",
+ [(br bb:$dest)]>;
+
+ // Unconditional, absolute address branch
+ def BRA:
+ UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+ "bra\t$dest",
+ [/* no pattern */]>;
+
+ // Indirect branch
+ let isIndirectBranch = 1 in {
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ }
+ }
+
+ // Conditional branches:
+ class BRNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+ BranchResolv, pattern>;
+
+ class BRNZRegInst<RegisterClass rclass>:
+ BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRNZVecInst<ValueType vectype>:
+ BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+ [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+ multiclass BranchNotZero {
+ def v4i32 : BRNZVecInst<v4i32>;
+ def r32 : BRNZRegInst<R32C>;
+ }
+
+ defm BRNZ : BranchNotZero;
+
+ class BRZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+ BranchResolv, pattern>;
+
+ class BRZRegInst<RegisterClass rclass>:
+ BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRZVecInst<ValueType vectype>:
+ BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZero {
+ def v4i32: BRZVecInst<v4i32>;
+ def r32: BRZRegInst<R32C>;
+ }
+
+ defm BRZ: BranchZero;
+
+ // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+ // be useful:
+ /*
+ class BINZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+ class BINZRegInst<RegisterClass rclass>:
+ BINZInst<(ins rclass:$rA, brtarget:$dest),
+ [(brcond rclass:$rA, R32C:$dest)]>;
+
+ class BINZVecInst<ValueType vectype>:
+ BINZInst<(ins VECREG:$rA, R32C:$dest),
+ [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+ multiclass BranchNotZeroIndirect {
+ def v4i32: BINZVecInst<v4i32>;
+ def r32: BINZRegInst<R32C>;
+ }
+
+ defm BINZ: BranchNotZeroIndirect;
+
+ class BIZInst<dag IOL, list<dag> pattern>:
+ BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+ class BIZRegInst<RegisterClass rclass>:
+ BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+ class BIZVecInst<ValueType vectype>:
+ BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+ multiclass BranchZeroIndirect {
+ def v4i32: BIZVecInst<v4i32>;
+ def r32: BIZRegInst<R32C>;
+ }
+
+ defm BIZ: BranchZeroIndirect;
+ */
+
+ class BRHNZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+ pattern>;
+
+ class BRHNZRegInst<RegisterClass rclass>:
+ BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+ [(brcond rclass:$rCond, bb:$dest)]>;
+
+ class BRHNZVecInst<ValueType vectype>:
+ BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchNotZeroHalfword {
+ def v8i16: BRHNZVecInst<v8i16>;
+ def r16: BRHNZRegInst<R16C>;
+ }
+
+ defm BRHNZ: BranchNotZeroHalfword;
+
+ class BRHZInst<dag IOL, list<dag> pattern>:
+ RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+ pattern>;
+
+ class BRHZRegInst<RegisterClass rclass>:
+ BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ class BRHZVecInst<ValueType vectype>:
+ BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+ multiclass BranchZeroHalfword {
+ def v8i16: BRHZVecInst<v8i16>;
+ def r16: BRHZRegInst<R16C>;
+ }
+
+ defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+ (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+ (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+ (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+ (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+ def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+ def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+ SPUInstr orinst32, SPUInstr brinst32>
+{
+ def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+ (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+ (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+ (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+ (CEQHr16 R16C:$rA, R16:$rB)),
+ bb:$dest)>;
+
+ def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+ (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+ (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+ bb:$dest)>;
+
+ def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+ (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+ (CEQr32 R32C:$rA, R32C:$rB)),
+ bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+ let isReturn = 1 in {
+ def RET:
+ RETForm<"bi\t$$lr", [(retflag)]>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+ FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+ def v4f32: FAVecInst<v4f32>;
+ def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+ SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+ FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (vectype VECREG:$rT),
+ (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+ def v4f32: FSVecInst<v4f32>;
+ def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01100011010, OOL, IOL,
+ "fm\t$rT, $rA, $rB", SPrecFP,
+ pattern>;
+
+class FMVecInst<ValueType type>:
+ FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ [(set (type VECREG:$rT),
+ (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+ def v4f32: FMVecInst<v4f32>;
+ def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fadd (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+ RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+ RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT,
+ (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+// = - (a * b - c)
+// = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+ RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+ RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+ [(set (v4f32 VECREG:$rT),
+ (fsub (v4f32 VECREG:$rC),
+ (fmul (v4f32 VECREG:$rA),
+ (v4f32 VECREG:$rB))))]>;
+
+
+
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+ RRForm_1<0b00110111000, OOL, IOL,
+ "frest\t$rT, $rA", SPrecFP,
+ [/* no pattern */]>;
+
+def FRESTv4f32 :
+ FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+ FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+ RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+def FIf32 :
+ RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fi\t$rT, $rA, $rB", SPrecFP,
+ [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+ RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fceq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+ (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+ RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmeq\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+ RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
+ (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+ RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+ "fcmgt\t$rT, $rA, $rB", SPrecFP,
+ [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+ (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+ (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+ RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+ "fscrwr\t$rA", SPrecFP,
+ [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+ RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+ "fscrrd\t$rT", SPrecFP,
+ [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+// frest rC rB # c = 1/b (both lines)
+// fi rC rB rC
+// fm rD rA rC # d = a * 1/b
+// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+// fma rB rB rC rD # b = b * c + d
+// = -(d *b -a) * c + d
+// = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+ CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+ CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "csflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+ CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+ "cuflt\t$rT, $rA, 0", SPrecFP,
+ [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cfltu\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+ CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+ CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+ "cflts\t$rT, $rA, 0", SPrecFP,
+ [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+ RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
+
+def FESDf32 :
+ RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+ "fesd\t$rT, $rA", SPrecFP,
+ [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+// "frds\t$rT, $rA,", SPrecFP,
+// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+ RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+ "frds\t$rT, $rA", SPrecFP,
+ [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+ RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+ RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfa\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+ RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+ RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfs\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+ RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+ RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+ "dfm\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+ RRForm<0b00111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMAv2f64:
+ RRForm<0b00111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSf64 :
+ RRForm<0b10111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FMSv2f64 :
+ RRForm<0b10111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfms\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+ (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+ DPrecFP, pattern>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+ DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+ DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ pattern>;
+
+multiclass DFMultiplySubtract
+{
+ def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT),
+ (fsub (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB))))]>;
+
+ def f64 : DFNMSRegInst<[(set R64FP:$rT,
+ (fsub R64FP:$rC,
+ (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+ RRForm<0b11111010110, (outs R64FP:$rT),
+ (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+def FNMAv2f64 :
+ RRForm<0b11111010110, (outs VECREG:$rT),
+ (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+ "dfnma\t$rT, $rA, $rB", DPrecFP,
+ [(set (v2f64 VECREG:$rT),
+ (fneg (fadd (v2f64 VECREG:$rC),
+ (fmul (v2f64 VECREG:$rA),
+ (v2f64 VECREG:$rB)))))]>,
+ RegConstraint<"$rC = $rT">,
+ NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+ (XORfnegvec (v4f32 VECREG:$rA),
+ (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+ (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+ (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+ (ANDfabsvec (v4f32 VECREG:$rA),
+ (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+def HBRA :
+ HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000010;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+ let Pattern = [];
+
+ let Inst{0-10} = 0b10000000000;
+ let Inst{11-17} = 0;
+ let Inst{18-24} = 0;
+ let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+ (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+ (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+ (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+ (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+ (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+ (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+ (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+ (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+ (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+ (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+ (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+ (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+ (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+ (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+ (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+ (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+ (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+ (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+ (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+
+def : Pat<(i64 (sext R8C:$rSrc)),
+ (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+ (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+ (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+ (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+ (COPY_TO_REGCLASS
+ (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
+ 0x4),
+ (ILv4i32 0x0),
+ (FSMBIv4i32 0x0f0f)), R64C)>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+ (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+ (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+ (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+
+def : Pat<(sext R16C:$rSrc),
+ (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+ (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+ (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+ (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+ (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+ (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (COPY_TO_REGCLASS R16C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv4i32_m32
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (COPY_TO_REGCLASS R32C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBgprc GPRC:$src, GPRC:$src,
+ (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+ (COPY_TO_REGCLASS
+ (SHUFBv2i64_m32
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (COPY_TO_REGCLASS R64C:$src, VECREG),
+ (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+ (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+ (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+ (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+ (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+ (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+ (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+ (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+ (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp
new file mode 100644
index 000000000000..3e948d071d63
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp
@@ -0,0 +1,14 @@
+//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMachineFunction.h"
+
+using namespace llvm;
+
+void SPUFunctionInfo::anchor() { }
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 000000000000..399684bb0887
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,50 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+ /// UsesLR - Indicates whether LR is used in the current function.
+ ///
+ bool UsesLR;
+
+ // VarArgsFrameIndex - FrameIndex for start of varargs area.
+ int VarArgsFrameIndex;
+
+public:
+ SPUFunctionInfo(MachineFunction& MF)
+ : UsesLR(false),
+ VarArgsFrameIndex(0)
+ {}
+
+ void setUsesLR(bool U) { UsesLR = U; }
+ bool usesLR() { return UsesLR; }
+
+ int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+ void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 000000000000..9a5c3976afbe
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===//
+//
+// Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+ (ORv4i32
+ (ANDv4i32
+ (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+ (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)),
+ (ILAv4i32 0x0000ffff)),
+ (SHLIv4i32
+ (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+ (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+ (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+ (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+ (FSMBIv8i16 0x2222)), 16))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+ (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+ (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+ (FSMBIv8i16 0xcccc))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+ Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+ (Av4i32
+ (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
+ (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
+ (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
+
+def MPYi32:
+ Pat<(mul R32C:$rA, R32C:$rB),
+ (Ar32
+ (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+ (MPYHr32 R32C:$rB, R32C:$rA)),
+ (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+ Interpf32.Fragment,
+ DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+ (SELBf32_cond NRaphf32.Fragment,
+ Epsilonf32.Fragment,
+ (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+ (v4f32 VECREG:$rB),
+ (v4f32 VECREG:$rA)),
+ Interpv4f32.Fragment,
+ DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+ (SELBv4f32_cond NRaphv4f32.Fragment,
+ Epsilonv4f32.Fragment,
+ (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+ Epsilonv4f32.Fragment,
+ (v4f32 VECREG:$rA)), -1))>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 000000000000..a47e9ef0167c
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,159 @@
+//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+ SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+ SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+ SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+ SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+ SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+ SPUvecshift_type>;
+
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+ string Constraints = C;
+}
+
+class NoEncode<string E> {
+ string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 000000000000..7c58041e3b84
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to
+// satisfy the dual issue requirements. The actual dual issue scheduling is
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+ struct SPUNopFiller : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+ const InstrItineraryData *IID;
+ bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd
+
+ static char ID;
+ SPUNopFiller(TargetMachine &tm)
+ : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()),
+ IID(tm.getInstrItineraryData())
+ {
+ DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+ }
+
+ virtual const char *getPassName() const {
+ return "SPU nop/lnop Filler";
+ }
+
+ void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+ bool runOnMachineFunction(MachineFunction &F) {
+ isEvenPlace = true; //all functions get an .align 3 directive at start
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ runOnMachineBasicBlock(*FI);
+ return true; //never-ever do any more modifications, just print it!
+ }
+
+ typedef enum { none = 0, // no more instructions in this function / BB
+ pseudo = 1, // this does not get executed
+ even = 2,
+ odd = 3 } SPUOpPlace;
+ SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+ };
+ char SPUNopFiller::ID = 0;
+
+}
+
+// Fill a BasicBlock to alignment.
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+ assert( isEvenPlace && "basic block start from odd address");
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ {
+ SPUOpPlace this_optype, next_optype;
+ MachineBasicBlock::iterator J = I;
+ J++;
+
+ this_optype = getOpPlacement( *I );
+ next_optype = none;
+ while (J!=MBB.end()){
+ next_optype = getOpPlacement( *J );
+ ++J;
+ if (next_optype != pseudo )
+ break;
+ }
+
+ // padd: odd(wrong), even(wrong), ...
+ // to: nop(corr), odd(corr), even(corr)...
+ if( isEvenPlace && this_optype == odd && next_optype == even ) {
+ DEBUG( dbgs() <<"Adding NOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+ isEvenPlace=false;
+ }
+
+ // padd: even(wrong), odd(wrong), ...
+ // to: lnop(corr), even(corr), odd(corr)...
+ else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+ DEBUG( dbgs() <<"Adding LNOP before: "; );
+ DEBUG( I->dump(); );
+ BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+ isEvenPlace=true;
+ }
+
+ // now go to next mem slot
+ if( this_optype != pseudo )
+ isEvenPlace = !isEvenPlace;
+
+ }
+
+ // padd basicblock end
+ if( !isEvenPlace ){
+ MachineBasicBlock::iterator J = MBB.end();
+ J--;
+ if (getOpPlacement( *J ) == odd) {
+ DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+ BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+ }
+ else {
+ J++;
+ DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+ BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+ }
+ isEvenPlace=true;
+ }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+ return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+ int sc = instr.getDesc().getSchedClass();
+ const InstrStage *stage = IID->beginStage(sc);
+ unsigned FUs = stage->getUnits();
+ SPUOpPlace retval;
+
+ switch( FUs ) {
+ case 0: retval = pseudo; break;
+ case 1: retval = odd; break;
+ case 2: retval = even; break;
+ default: retval= pseudo;
+ assert( false && "got unknown FuncUnit\n");
+ break;
+ };
+ return retval;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 000000000000..6f8deef5530f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,664 @@
+//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+// TO_IMM32 - Convert an i8/i16 to i32.
+def TO_IMM32 : SDNodeXForm<imm, [{
+ return getI32Imm(N->getZExtValue());
+}]>;
+
+// TO_IMM16 - Convert an i8/i32 to i16.
+def TO_IMM16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
+}]>;
+
+
+def LO16 : SDNodeXForm<imm, [{
+ unsigned val = N->getZExtValue();
+ // Transformation function: get the low 16 bits.
+ return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ // Transformation function: get the low 16 bit immediate from a build_vector
+ // node.
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "LO16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+ SDValue OpVal(0, 0);
+
+ assert(N->getOpcode() == ISD::BUILD_VECTOR
+ && "HI16_vec got something other than a BUILD_VECTOR");
+
+ // Get first constant operand...
+ for (unsigned i = 0, e = N->getNumOperands();
+ OpVal.getNode() == 0 && i != e; ++i) {
+ if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+ if (OpVal.getNode() == 0)
+ OpVal = N->getOperand(i);
+ }
+
+ assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+ ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+ return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+ int sextVal = int(N->getSExtValue());
+ return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8 : PatLeaf<(imm), [{
+ int Value = int(N->getSExtValue());
+ return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+ return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10 : PatLeaf<(imm), [{
+ return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field. Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10 : PatLeaf<(imm), [{
+ return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field. Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10 : PatLeaf<(imm), [{
+ return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+ return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16 : PatLeaf<(imm), [{
+ // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+ // field.
+ short Ignored;
+ return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16 : PatLeaf<(imm), [{
+ // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+ // field.
+ return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+ // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+ return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18 : PatLeaf<(imm), [{
+ // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+ int Value = (int) N->getZExtValue();
+ return isUInt<18>(Value);
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+ // lo16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = N->getZExtValue();
+ return ((val & 0x0000ffff) == val);
+ }
+
+ return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+ // hi16 predicate - returns true if the immediate has all zeros in the
+ // low order bits and is a 32-bit constant:
+ if (N->getValueType(0) == MVT::i32) {
+ uint32_t val = uint32_t(N->getZExtValue());
+ return ((val & 0xffff0000) == val);
+ } else if (N->getValueType(0) == MVT::i64) {
+ uint64_t val = N->getZExtValue();
+ return ((val & 0xffff0000ULL) == val);
+ }
+
+ return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+ // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+ // (shift left quadword by bits immediate)
+ int64_t Val = N->getZExtValue();
+ return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+ float fval = N->getValueAPF().convertToFloat();
+ return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+ short Ignored;
+ return isFPS16Immediate(N, Ignored);
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+ return ((val & 0xffff0000) == val);
+ }
+
+ return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18 : PatLeaf<(fpimm), [{
+ if (N->getValueType(0) == MVT::f32) {
+ uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+ return isUInt<18>(Value);
+ }
+
+ return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+ return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+ return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+ return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+ return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+ let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+ let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+ let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+ let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm : Operand<i16> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+ let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm : Operand<i32> {
+ let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+ let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+ let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+ let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+ let PrintMethod = "printPCRelativeOperand";
+ let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+ let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+ let PrintMethod = "printCallOperand";
+ let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+ let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+ let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+ let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+ let PrintMethod = "printShufAddr";
+ let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+ let PrintMethod = "printDFormAddr";
+ let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+ let PrintMethod = "printAddr256K";
+ let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+ let PrintMethod = "printMemRegImmS18";
+ let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+ let PrintMethod = "printMemRegReg";
+ let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form : [r+I10] (10-bit signed offset + reg)
+// X-form : [r+r] (reg+reg)
+// A-form : abs (256K LSA offset)
+// D-form(2): [r+I7] (7-bit signed offset + reg)
+
+def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+ [], [SDNPWantRoot]>;
+def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+ [], [SDNPWantRoot]>;
+def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+ [], [SDNPWantRoot]>;
+def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+ [], [SDNPWantRoot]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 000000000000..1b2da5f50c81
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,356 @@
+//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPURegisterInfo.h"
+#include "SPU.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ using namespace SPU;
+ switch (RegEnum) {
+ case SPU::R0: return 0;
+ case SPU::R1: return 1;
+ case SPU::R2: return 2;
+ case SPU::R3: return 3;
+ case SPU::R4: return 4;
+ case SPU::R5: return 5;
+ case SPU::R6: return 6;
+ case SPU::R7: return 7;
+ case SPU::R8: return 8;
+ case SPU::R9: return 9;
+ case SPU::R10: return 10;
+ case SPU::R11: return 11;
+ case SPU::R12: return 12;
+ case SPU::R13: return 13;
+ case SPU::R14: return 14;
+ case SPU::R15: return 15;
+ case SPU::R16: return 16;
+ case SPU::R17: return 17;
+ case SPU::R18: return 18;
+ case SPU::R19: return 19;
+ case SPU::R20: return 20;
+ case SPU::R21: return 21;
+ case SPU::R22: return 22;
+ case SPU::R23: return 23;
+ case SPU::R24: return 24;
+ case SPU::R25: return 25;
+ case SPU::R26: return 26;
+ case SPU::R27: return 27;
+ case SPU::R28: return 28;
+ case SPU::R29: return 29;
+ case SPU::R30: return 30;
+ case SPU::R31: return 31;
+ case SPU::R32: return 32;
+ case SPU::R33: return 33;
+ case SPU::R34: return 34;
+ case SPU::R35: return 35;
+ case SPU::R36: return 36;
+ case SPU::R37: return 37;
+ case SPU::R38: return 38;
+ case SPU::R39: return 39;
+ case SPU::R40: return 40;
+ case SPU::R41: return 41;
+ case SPU::R42: return 42;
+ case SPU::R43: return 43;
+ case SPU::R44: return 44;
+ case SPU::R45: return 45;
+ case SPU::R46: return 46;
+ case SPU::R47: return 47;
+ case SPU::R48: return 48;
+ case SPU::R49: return 49;
+ case SPU::R50: return 50;
+ case SPU::R51: return 51;
+ case SPU::R52: return 52;
+ case SPU::R53: return 53;
+ case SPU::R54: return 54;
+ case SPU::R55: return 55;
+ case SPU::R56: return 56;
+ case SPU::R57: return 57;
+ case SPU::R58: return 58;
+ case SPU::R59: return 59;
+ case SPU::R60: return 60;
+ case SPU::R61: return 61;
+ case SPU::R62: return 62;
+ case SPU::R63: return 63;
+ case SPU::R64: return 64;
+ case SPU::R65: return 65;
+ case SPU::R66: return 66;
+ case SPU::R67: return 67;
+ case SPU::R68: return 68;
+ case SPU::R69: return 69;
+ case SPU::R70: return 70;
+ case SPU::R71: return 71;
+ case SPU::R72: return 72;
+ case SPU::R73: return 73;
+ case SPU::R74: return 74;
+ case SPU::R75: return 75;
+ case SPU::R76: return 76;
+ case SPU::R77: return 77;
+ case SPU::R78: return 78;
+ case SPU::R79: return 79;
+ case SPU::R80: return 80;
+ case SPU::R81: return 81;
+ case SPU::R82: return 82;
+ case SPU::R83: return 83;
+ case SPU::R84: return 84;
+ case SPU::R85: return 85;
+ case SPU::R86: return 86;
+ case SPU::R87: return 87;
+ case SPU::R88: return 88;
+ case SPU::R89: return 89;
+ case SPU::R90: return 90;
+ case SPU::R91: return 91;
+ case SPU::R92: return 92;
+ case SPU::R93: return 93;
+ case SPU::R94: return 94;
+ case SPU::R95: return 95;
+ case SPU::R96: return 96;
+ case SPU::R97: return 97;
+ case SPU::R98: return 98;
+ case SPU::R99: return 99;
+ case SPU::R100: return 100;
+ case SPU::R101: return 101;
+ case SPU::R102: return 102;
+ case SPU::R103: return 103;
+ case SPU::R104: return 104;
+ case SPU::R105: return 105;
+ case SPU::R106: return 106;
+ case SPU::R107: return 107;
+ case SPU::R108: return 108;
+ case SPU::R109: return 109;
+ case SPU::R110: return 110;
+ case SPU::R111: return 111;
+ case SPU::R112: return 112;
+ case SPU::R113: return 113;
+ case SPU::R114: return 114;
+ case SPU::R115: return 115;
+ case SPU::R116: return 116;
+ case SPU::R117: return 117;
+ case SPU::R118: return 118;
+ case SPU::R119: return 119;
+ case SPU::R120: return 120;
+ case SPU::R121: return 121;
+ case SPU::R122: return 122;
+ case SPU::R123: return 123;
+ case SPU::R124: return 124;
+ case SPU::R125: return 125;
+ case SPU::R126: return 126;
+ case SPU::R127: return 127;
+ default:
+ report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+ }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+ const TargetInstrInfo &tii) :
+ SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii)
+{
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
+ return &SPU::R32CRegClass;
+}
+
+const uint16_t *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+ // Cell ABI calling convention
+ static const uint16_t SPU_CalleeSaveRegs[] = {
+ SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+ SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+ SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+ SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+ SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+ SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+ SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+ SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+ SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+ SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+ SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+ SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+ SPU::R2, /* environment pointer */
+ SPU::R1, /* stack pointer */
+ SPU::R0, /* link register */
+ 0 /* end */
+ };
+
+ return SPU_CalleeSaveRegs;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(SPU::R0); // LR
+ Reserved.set(SPU::R1); // SP
+ Reserved.set(SPU::R2); // environment pointer
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I)
+ const
+{
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS) const
+{
+ unsigned i = 0;
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = II->getDebugLoc();
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+
+ MachineOperand &SPOp = MI.getOperand(i);
+ int FrameIndex = SPOp.getIndex();
+
+ // Now add the frame object offset to the offset from r1.
+ int Offset = MFI->getObjectOffset(FrameIndex);
+
+ // Most instructions, except for generated FrameIndex additions using AIr32
+ // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+ // immediate in operand 2.
+ unsigned OpNo = 1;
+ if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+ OpNo = 2;
+
+ MachineOperand &MO = MI.getOperand(OpNo);
+
+ // Offset is biased by $lr's slot at the bottom.
+ Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
+ assert((Offset & 0xf) == 0
+ && "16-byte alignment violated in eliminateFrameIndex");
+
+ // Replace the FrameIndex with base register with $sp (aka $r1)
+ SPOp.ChangeToRegister(SPU::R1, false);
+
+ // if 'Offset' doesn't fit to the D-form instruction's
+ // immediate, convert the instruction to X-form
+ // if the instruction is not an AI (which takes a s10 immediate), assume
+ // it is a load/store that can take a s14 immediate
+ if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+ || !isInt<14>(Offset)) {
+ int newOpcode = convertDFormToXForm(MI.getOpcode());
+ unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+ BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+ .addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+ .addReg(tmpReg, RegState::Kill)
+ .addReg(SPU::R1);
+ // remove the replaced D-form instruction
+ MBB.erase(II);
+ } else {
+ MO.ChangeToImmediate(Offset);
+ }
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+ return SPU::R1;
+}
+
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+ switch(dFormOpcode)
+ {
+ case SPU::AIr32: return SPU::Ar32;
+ case SPU::LQDr32: return SPU::LQXr32;
+ case SPU::LQDr128: return SPU::LQXr128;
+ case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4i32: return SPU::LQXv4i32;
+ case SPU::LQDv4f32: return SPU::LQXv4f32;
+ case SPU::STQDr32: return SPU::STQXr32;
+ case SPU::STQDr128: return SPU::STQXr128;
+ case SPU::STQDv16i8: return SPU::STQXv16i8;
+ case SPU::STQDv4i32: return SPU::STQXv4i32;
+ case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+ default: assert( false && "Unhandled D to X-form conversion");
+ }
+ // default will assert, but need to return something to keep the
+ // compiler happy.
+ return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const
+{
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ assert( Reg && "Register scavenger failed");
+ return Reg;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 000000000000..e5ab22422502
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,101 @@
+//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+
+#define GET_REGINFO_HEADER
+#include "SPUGenRegisterInfo.inc"
+
+namespace llvm {
+ class SPUSubtarget;
+ class TargetInstrInfo;
+ class Type;
+
+ class SPURegisterInfo : public SPUGenRegisterInfo {
+ private:
+ const SPUSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ //! Predicate: Does the machine function use the link register?
+ bool usesLR(MachineFunction &MF) const;
+
+ public:
+ SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+
+ //! Translate a register's enum value to a register number
+ /*!
+ This method translates a register's enum value to it's regiser number,
+ e.g. SPU::R14 -> 14.
+ */
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// getPointerRegClass - Return the register class to use to hold pointers.
+ /// This is used for addressing modes.
+ virtual const TargetRegisterClass *
+ getPointerRegClass(unsigned Kind = 0) const;
+
+ /// After allocating this many registers, the allocator should feel
+ /// register pressure. The value is a somewhat random guess, based on the
+ /// number of non callee saved registers in the C calling convention.
+ virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+ MachineFunction &MF) const{
+ return 50;
+ }
+
+ //! Return the array of callee-saved registers
+ virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+ //! Allow for scavenging, so we can get scratch registers when needed.
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+ { return true; }
+
+ //! Return the reserved registers
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ //! Eliminate the call frame setup pseudo-instructions
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+ //! Convert frame indicies into machine operands
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ RegScavenger *RS = NULL) const;
+
+ //! Get the stack frame register (SP, aka R1)
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ //------------------------------------------------------------------------
+ // New methods added:
+ //------------------------------------------------------------------------
+
+ //! Convert D-form load/store to X-form load/store
+ /*!
+ Converts a regiser displacement load/store into a register-indexed
+ load/store for large stack frames, when the stack frame exceeds the
+ range of a s10 displacement.
+ */
+ int convertDFormToXForm(int dFormOpcode) const;
+
+ //! Acquire an unused register in an emergency.
+ unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const;
+
+ };
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 000000000000..f27b042edd63
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,183 @@
+//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+ let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+ field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+// The non-volatile registers are allocated in reverse order, like PPC does it.
+def GPRC : RegisterClass<"SPU", [i128], 128,
+ (add (sequence "R%u", 0, 79),
+ (sequence "R%u", 127, 80))>;
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
+ (add GPRC)>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 000000000000..e557ed340a28
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,19 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers. This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 000000000000..9ccd0844e48e
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,59 @@
+//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore : InstrItinClass; // ODD_UNIT
+def BranchHints : InstrItinClass; // ODD_UNIT
+def BranchResolv : InstrItinClass; // ODD_UNIT
+def ChanOpSPR : InstrItinClass; // ODD_UNIT
+def ShuffleOp : InstrItinClass; // ODD_UNIT
+def SelectOp : InstrItinClass; // ODD_UNIT
+def GatherOp : InstrItinClass; // ODD_UNIT
+def LoadNOP : InstrItinClass; // ODD_UNIT
+def ExecNOP : InstrItinClass; // EVEN_UNIT
+def SPrecFP : InstrItinClass; // EVEN_UNIT
+def DPrecFP : InstrItinClass; // EVEN_UNIT
+def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer)
+def ByteOp : InstrItinClass; // EVEN_UNIT
+def IntegerOp : InstrItinClass; // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass; // EVEN_UNIT
+def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad
+def ImmLoad : InstrItinClass; // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
+ InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>,
+ InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>,
+ InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>,
+ InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>,
+ InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>,
+ InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>,
+ InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>,
+ InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>,
+ InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]>
+ ]>;
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 000000000000..5732fd43cdc2
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUTargetMachine.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 000000000000..39257d92c400
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUTargetMachine;
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+ explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
+ ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 000000000000..eec2d250be7f
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,65 @@
+//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPUGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS) :
+ SPUGenSubtargetInfo(TT, CPU, FS),
+ StackAlignment(16),
+ ProcDirective(SPU::DEFAULT_PROC),
+ UseLargeMem(false)
+{
+ // Should be the target SPU processor type. For now, since there's only
+ // one, simply default to the current "v0" default:
+ std::string default_cpu("v0");
+
+ // Parse features string.
+ ParseSubtargetFeatures(default_cpu, FS);
+
+ // Initialize scheduling itinerary for the specified CPU.
+ InstrItins = getInstrItineraryForCPU(default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ // CriticalPathsRCs seems to be the set of
+ // RegisterClasses that antidep breakings are performed for.
+ // Do it for all register classes
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&SPU::R8CRegClass);
+ CriticalPathRCs.push_back(&SPU::R16CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32CRegClass);
+ CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+ CriticalPathRCs.push_back(&SPU::R64CRegClass);
+ CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 000000000000..7c4aa1430217
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,97 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+#define GET_SUBTARGETINFO_HEADER
+#include "SPUGenSubtargetInfo.inc"
+
+namespace llvm {
+ class GlobalValue;
+ class StringRef;
+
+ namespace SPU {
+ enum {
+ PROC_NONE,
+ DEFAULT_PROC
+ };
+ }
+
+ class SPUSubtarget : public SPUGenSubtargetInfo {
+ protected:
+ /// stackAlignment - The minimum alignment known to hold of the stack frame
+ /// on entry to the function and which must be maintained by every function.
+ unsigned StackAlignment;
+
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
+ /// Which SPU processor (this isn't really used, but it's there to keep
+ /// the C compiler happy)
+ unsigned ProcDirective;
+
+ /// Use (assume) large memory -- effectively disables the LQA/STQA
+ /// instructions that assume 259K local store.
+ bool UseLargeMem;
+
+ public:
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ ///
+ SPUSubtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// SetJITMode - This is called to inform the subtarget info that we are
+ /// producing code for the JIT.
+ void SetJITMode();
+
+ /// getStackAlignment - Returns the minimum alignment known to hold of the
+ /// stack frame on entry to the function and which must be maintained by
+ /// every function for this subtarget.
+ unsigned getStackAlignment() const { return StackAlignment; }
+
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const {
+ return InstrItins;
+ }
+
+ /// Use large memory addressing predicate
+ bool usingLargeMem() const {
+ return UseLargeMem;
+ }
+
+ /// getTargetDataString - Return the pointer size and type alignment
+ /// properties of this subtarget.
+ const char *getTargetDataString() const {
+ return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+ "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
+ "-s:128:128-n32:64";
+ }
+
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
+ };
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 000000000000..21f6b25bf256
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,93 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUTargetMachine.h"
+#include "SPU.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeCellSPUTarget() {
+ // Register the target.
+ RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
+}
+
+const std::pair<unsigned, int> *
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+ NumEntries = 1;
+ return &LR[0];
+}
+
+SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS,
+ const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL)
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS),
+ DataLayout(Subtarget.getTargetDataString()),
+ InstrInfo(*this),
+ FrameLowering(Subtarget),
+ TLInfo(*this),
+ TSInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// SPU Code Generator Pass Configuration Options.
+class SPUPassConfig : public TargetPassConfig {
+public:
+ SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM)
+ : TargetPassConfig(TM, PM) {}
+
+ SPUTargetMachine &getSPUTargetMachine() const {
+ return getTM<SPUTargetMachine>();
+ }
+
+ virtual bool addInstSelector();
+ virtual bool addPreEmitPass();
+};
+} // namespace
+
+TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) {
+ return new SPUPassConfig(this, PM);
+}
+
+bool SPUPassConfig::addInstSelector() {
+ // Install an instruction selector.
+ PM.add(createSPUISelDag(getSPUTargetMachine()));
+ return false;
+}
+
+// passes to run just before printing the assembly
+bool SPUPassConfig::addPreEmitPass() {
+ // load the TCE instruction scheduler, if available via
+ // loaded plugins
+ typedef llvm::FunctionPass* (*BuilderFunc)(const char*);
+ BuilderFunc schedulerCreator =
+ (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol(
+ "createTCESchedulerPass");
+ if (schedulerCreator != NULL)
+ PM.add(schedulerCreator("cellspu"));
+
+ //align instructions with nops/lnops for dual issue
+ PM.add(createSPUNopFillerPass(getSPUTargetMachine()));
+ return true;
+}
diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 000000000000..3e5d38c919c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,87 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+ SPUSubtarget Subtarget;
+ const TargetData DataLayout;
+ SPUInstrInfo InstrInfo;
+ SPUFrameLowering FrameLowering;
+ SPUTargetLowering TLInfo;
+ SPUSelectionDAGInfo TSInfo;
+ InstrItineraryData InstrItins;
+public:
+ SPUTargetMachine(const Target &T, StringRef TT,
+ StringRef CPU, StringRef FS, const TargetOptions &Options,
+ Reloc::Model RM, CodeModel::Model CM,
+ CodeGenOpt::Level OL);
+
+ /// Return the subtarget implementation object
+ virtual const SPUSubtarget *getSubtargetImpl() const {
+ return &Subtarget;
+ }
+ virtual const SPUInstrInfo *getInstrInfo() const {
+ return &InstrInfo;
+ }
+ virtual const SPUFrameLowering *getFrameLowering() const {
+ return &FrameLowering;
+ }
+ /*!
+ \note Cell SPU does not support JIT today. It could support JIT at some
+ point.
+ */
+ virtual TargetJITInfo *getJITInfo() {
+ return NULL;
+ }
+
+ virtual const SPUTargetLowering *getTargetLowering() const {
+ return &TLInfo;
+ }
+
+ virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
+ virtual const SPURegisterInfo *getRegisterInfo() const {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ virtual const TargetData *getTargetData() const {
+ return &DataLayout;
+ }
+
+ virtual const InstrItineraryData *getInstrItineraryData() const {
+ return &InstrItins;
+ }
+
+ // Pass Pipeline Configuration
+ virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
new file mode 100644
index 000000000000..84aadfad6f8d
--- /dev/null
+++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCellSPUTarget;
+
+extern "C" void LLVMInitializeCellSPUTargetInfo() {
+ RegisterTarget<Triple::cellspu>
+ X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
+}