diff options
Diffstat (limited to 'contrib/llvm/lib/Target/CellSPU')
41 files changed, 14400 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td new file mode 100644 index 000000000000..cdb4099ffbca --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/CellSDKIntrinsics.td @@ -0,0 +1,449 @@ +//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +///--==-- Arithmetic ops intrinsics --==-- +def CellSDKah: + RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>; +def CellSDKahi: + RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>; +def CellSDKa: + RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>; +def CellSDKai: + RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>; +def CellSDKsfh: + RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>; +def CellSDKsfhi: + RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>; +def CellSDKsf: + RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>; +def CellSDKsfi: + RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>; +def CellSDKaddx: + RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>; +def CellSDKcg: + RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>; +def CellSDKcgx: + RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>; +def CellSDKsfx: + RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>; +def CellSDKbg: + RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>; +def CellSDKbgx: + RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>; + +def CellSDKmpy: + RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpy $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyu: + RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyu $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))] >; + +def CellSDKmpyi: + RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "mpyi $rT, $rA, $val", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA), + i16ImmSExt10:$val))]>; + +def CellSDKmpyui: + RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "mpyui $rT, $rA, $val", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA), + i16ImmSExt10:$val))]>; + +def CellSDKmpya: + RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "mpya $rT, $rA, $rB, $rC", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB), + (v8i16 VECREG:$rC)))]>; + +def CellSDKmpyh: + RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyh $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpys: + RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpys $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyhh: + RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhh $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyhha: + RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhha $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave +// as an intrinsic for the time being +def CellSDKmpyhhu: + RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhhu $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKmpyhhau: + RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpyhhau $rT, $rA, $rB", IntegerMulDiv, + [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def CellSDKand: + RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "and\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKandc: + RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "andc\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKandbi: + RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "andbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKandhi: + RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "andhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKandi: + RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "andi\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKor: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "or\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKorc: + RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "addc\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKorbi: + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "orbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKorhi: + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "orhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKori: + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ori\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKxor: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "xor\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKxorbi: + RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "xorbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKxorhi: + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "xorhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKxori: + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "xori\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKnor: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "nor\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKnand: + RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "nand\t $rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), + (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +//===----------------------------------------------------------------------===// +// Shift/rotate intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKshli: + Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val), + (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>; + +def CellSDKshlqbi: + Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB), + (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>; + +def CellSDKshlqii: + Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val), + (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>; + +def CellSDKshlqby: + Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB), + (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>; + +def CellSDKshlqbyi: + Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val), + (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>; + + +//===----------------------------------------------------------------------===// +// Branch/compare intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKceq: + RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ceq\t $rT, $rA, $rB", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKceqi: + RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ceqi\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKceqb: + RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ceqb\t $rT, $rA, $rB", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; + +def CellSDKceqbi: + RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "ceqbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKceqh: + RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ceqh\t $rT, $rA, $rB", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + +def CellSDKceqhi: + RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ceqhi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; +def CellSDKcgth: + RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "cgth\t $rT, $rA, $rB", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + +def CellSDKcgthi: + RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "cgthi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKcgt: + RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "cgt\t $rT, $rA, $rB", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKcgti: + RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "cgti\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKcgtb: + RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "cgtb\t $rT, $rA, $rB", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; + +def CellSDKcgtbi: + RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "cgtbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>; + +def CellSDKclgth: + RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "clgth\t $rT, $rA, $rB", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>; + +def CellSDKclgthi: + RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "clgthi\t $rT, $rA, $val", BranchResolv, + [(set (v8i16 VECREG:$rT), + (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>; + +def CellSDKclgt: + RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "clgt\t $rT, $rA, $rB", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + +def CellSDKclgti: + RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "clgti\t $rT, $rA, $val", BranchResolv, + [(set (v4i32 VECREG:$rT), + (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>; + +def CellSDKclgtb: + RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "clgtb\t $rT, $rA, $rB", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>; + +def CellSDKclgtbi: + RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val), + "clgtbi\t $rT, $rA, $val", BranchResolv, + [(set (v16i8 VECREG:$rT), + (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>; + +//===----------------------------------------------------------------------===// +// Floating-point intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKfa: + RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fa\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfs: + RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fs\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfm: + RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fm\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfceq: + RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fceq\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfcgt: + RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fcgt\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfcmeq: + RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fcmeq\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfcmgt: + RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fcmgt\t $rT, $rA, $rB", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB)))]>; + +def CellSDKfma: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t $rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rC)))]>; + +def CellSDKfnms: + RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t $rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rC)))]>; + +def CellSDKfms: + RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t $rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rC)))]>; + +//===----------------------------------------------------------------------===// +// Double precision floating-point intrinsics: +//===----------------------------------------------------------------------===// + +def CellSDKdfa: + RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfa\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfs: + RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfs\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfm: + RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfm\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfma: + RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfma\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfnma: + RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfnma\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfnms: + RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfnms\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; + +def CellSDKdfms: + RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfms\t $rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))]>; diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp new file mode 100644 index 000000000000..4bad37eacaf7 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp @@ -0,0 +1,43 @@ +//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declarations of the SPUMCAsmInfo properties. +// +//===----------------------------------------------------------------------===// + +#include "SPUMCAsmInfo.h" +using namespace llvm; + +void SPULinuxMCAsmInfo::anchor() { } + +SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) { + IsLittleEndian = false; + + ZeroDirective = "\t.space\t"; + Data64bitsDirective = "\t.quad\t"; + AlignmentIsInBytes = false; + + PCSymbol = "."; + CommentString = "#"; + GlobalPrefix = ""; + PrivateGlobalPrefix = ".L"; + + // Has leb128 + HasLEB128 = true; + + SupportsDebugInformation = true; + + // Exception handling is not supported on CellSPU (think about it: you only + // have 256K for code+data. Would you support exception handling?) + ExceptionsType = ExceptionHandling::None; + + // SPU assembly requires ".section" before ".bss" + UsesELFSectionDirectiveForBSS = true; +} + diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h new file mode 100644 index 000000000000..f786147b9267 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h @@ -0,0 +1,30 @@ +//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the SPUMCAsmInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUTARGETASMINFO_H +#define SPUTARGETASMINFO_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/MC/MCAsmInfo.h" + +namespace llvm { + class Target; + + class SPULinuxMCAsmInfo : public MCAsmInfo { + virtual void anchor(); + public: + explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT); + }; +} // namespace llvm + +#endif /* SPUTARGETASMINFO_H */ diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp new file mode 100644 index 000000000000..8450e2c6634c --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp @@ -0,0 +1,94 @@ +//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Cell SPU specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "SPUMCTargetDesc.h" +#include "SPUMCAsmInfo.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/MC/MCCodeGenInfo.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_INSTRINFO_MC_DESC +#include "SPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_MC_DESC +#include "SPUGenSubtargetInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "SPUGenRegisterInfo.inc" + +using namespace llvm; + +static MCInstrInfo *createSPUMCInstrInfo() { + MCInstrInfo *X = new MCInstrInfo(); + InitSPUMCInstrInfo(X); + return X; +} + +static MCRegisterInfo *createCellSPUMCRegisterInfo(StringRef TT) { + MCRegisterInfo *X = new MCRegisterInfo(); + InitSPUMCRegisterInfo(X, SPU::R0); + return X; +} + +static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU, + StringRef FS) { + MCSubtargetInfo *X = new MCSubtargetInfo(); + InitSPUMCSubtargetInfo(X, TT, CPU, FS); + return X; +} + +static MCAsmInfo *createSPUMCAsmInfo(const Target &T, StringRef TT) { + MCAsmInfo *MAI = new SPULinuxMCAsmInfo(T, TT); + + // Initial state of the frame pointer is R1. + MachineLocation Dst(MachineLocation::VirtualFP); + MachineLocation Src(SPU::R1, 0); + MAI->addInitialFrameState(0, Dst, Src); + + return MAI; +} + +static MCCodeGenInfo *createSPUMCCodeGenInfo(StringRef TT, Reloc::Model RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) { + MCCodeGenInfo *X = new MCCodeGenInfo(); + // For the time being, use static relocations, since there's really no + // support for PIC yet. + X->InitMCCodeGenInfo(Reloc::Static, CM, OL); + return X; +} + +// Force static initialization. +extern "C" void LLVMInitializeCellSPUTargetMC() { + // Register the MC asm info. + RegisterMCAsmInfoFn X(TheCellSPUTarget, createSPUMCAsmInfo); + + // Register the MC codegen info. + TargetRegistry::RegisterMCCodeGenInfo(TheCellSPUTarget, + createSPUMCCodeGenInfo); + + // Register the MC instruction info. + TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo); + + // Register the MC register info. + TargetRegistry::RegisterMCRegInfo(TheCellSPUTarget, + createCellSPUMCRegisterInfo); + + // Register the MC subtarget info. + TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget, + createSPUMCSubtargetInfo); +} diff --git a/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h new file mode 100644 index 000000000000..d26449e8908f --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h @@ -0,0 +1,38 @@ +//===-- SPUMCTargetDesc.h - CellSPU Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides CellSPU specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUMCTARGETDESC_H +#define SPUMCTARGETDESC_H + +namespace llvm { +class Target; + +extern Target TheCellSPUTarget; + +} // End llvm namespace + +// Define symbolic names for Cell registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "SPUGenRegisterInfo.inc" + +// Defines symbolic names for the SPU instructions. +// +#define GET_INSTRINFO_ENUM +#include "SPUGenInstrInfo.inc" + +#define GET_SUBTARGETINFO_ENUM +#include "SPUGenSubtargetInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.h b/contrib/llvm/lib/Target/CellSPU/SPU.h new file mode 100644 index 000000000000..c660131706cb --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPU.h @@ -0,0 +1,31 @@ +//===-- SPU.h - Top-level interface for Cell SPU Target ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in the LLVM +// Cell SPU back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_IBMCELLSPU_H +#define LLVM_TARGET_IBMCELLSPU_H + +#include "MCTargetDesc/SPUMCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class SPUTargetMachine; + class FunctionPass; + class formatted_raw_ostream; + + FunctionPass *createSPUISelDag(SPUTargetMachine &TM); + FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm); + +} + +#endif /* LLVM_TARGET_IBMCELLSPU_H */ diff --git a/contrib/llvm/lib/Target/CellSPU/SPU.td b/contrib/llvm/lib/Target/CellSPU/SPU.td new file mode 100644 index 000000000000..e835b9cac8e1 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPU.td @@ -0,0 +1,66 @@ +//===-- SPU.td - Describe the STI Cell SPU Target Machine --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is the top level entry point for the STI Cell SPU target machine. +// +//===----------------------------------------------------------------------===// + +// Get the target-independent interfaces which we are implementing. +// +include "llvm/Target/Target.td" + +// Holder of code fragments (you'd think this'd already be in +// a td file somewhere... :-) + +class CodeFrag<dag frag> { + dag Fragment = frag; +} + +//===----------------------------------------------------------------------===// +// Register File Description +//===----------------------------------------------------------------------===// + +include "SPURegisterInfo.td" + +//===----------------------------------------------------------------------===// +// Instruction formats, instructions +//===----------------------------------------------------------------------===// + +include "SPUNodes.td" +include "SPUOperands.td" +include "SPUSchedule.td" +include "SPUInstrFormats.td" +include "SPUInstrInfo.td" + +//===----------------------------------------------------------------------===// +// Subtarget features: +//===----------------------------------------------------------------------===// + +def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">; +def LargeMemFeature: + SubtargetFeature<"large_mem","UseLargeMem", "true", + "Use large (>256) LSA memory addressing [default = false]">; + +def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>; + +//===----------------------------------------------------------------------===// +// Calling convention: +//===----------------------------------------------------------------------===// + +include "SPUCallingConv.td" + +// Target: + +def SPUInstrInfo : InstrInfo { + let isLittleEndianEncoding = 1; +} + +def SPU : Target { + let InstructionSet = SPUInstrInfo; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td new file mode 100644 index 000000000000..e051e047333a --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPU128InstrInfo.td @@ -0,0 +1,41 @@ +//===-- SPU128InstrInfo.td - Cell SPU 128-bit operations --*- tablegen -*--===// +// +// Cell SPU 128-bit operations +// +//===----------------------------------------------------------------------===// + +// zext 32->128: Zero extend 32-bit to 128-bit +def : Pat<(i128 (zext R32C:$rSrc)), + (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; + +// zext 64->128: Zero extend 64-bit to 128-bit +def : Pat<(i128 (zext R64C:$rSrc)), + (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; + +// zext 16->128: Zero extend 16-bit to 128-bit +def : Pat<(i128 (zext R16C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; + +// zext 8->128: Zero extend 8-bit to 128-bit +def : Pat<(i128 (zext R8C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; + +// anyext 32->128: Zero extend 32-bit to 128-bit +def : Pat<(i128 (anyext R32C:$rSrc)), + (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>; + +// anyext 64->128: Zero extend 64-bit to 128-bit +def : Pat<(i128 (anyext R64C:$rSrc)), + (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>; + +// anyext 16->128: Zero extend 16-bit to 128-bit +def : Pat<(i128 (anyext R16C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>; + +// anyext 8->128: Zero extend 8-bit to 128-bit +def : Pat<(i128 (anyext R8C:$rSrc)), + (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>; + +// Shift left +def : Pat<(shl GPRC:$rA, R32C:$rB), + (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td new file mode 100644 index 000000000000..bea33b5362d2 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPU64InstrInfo.td @@ -0,0 +1,408 @@ +//====-- SPU64InstrInfo.td - Cell SPU 64-bit operations ---*- tablegen -*--===// +// +// Cell SPU 64-bit operations +// +//===----------------------------------------------------------------------===// + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// 64-bit comparisons: +// +// 1. The instruction sequences for vector vice scalar differ by a +// constant. In the scalar case, we're only interested in the +// top two 32-bit slots, whereas we're interested in an exact +// all-four-slot match in the vector case. +// +// 2. There are no "immediate" forms, since loading 64-bit constants +// could be a constant pool load. +// +// 3. i64 setcc results are i32, which are subsequently converted to a FSM +// mask when used in a select pattern. +// +// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) +// [Note: this may be moot, since gb produces v4i32 or r32.] +// +// 5. The code sequences for r64 and v2i64 are probably overly conservative, +// compared to the code that gcc produces. +// +// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBr64_cond: + SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), + [/* no pattern */]>; + +// The generic i64 select pattern, which assumes that the comparison result +// is in a 32-bit register that contains a select mask pattern (i.e., gather +// bits result): + +def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; + +// select the negative condition: +class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: + Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), + (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; + +// setcc the negative condition: +class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: + Pat<(cond R64C:$rA, R64C:$rB), + (XORIr32 compare.Fragment, -1)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// The i64 seteq fragment that does the scalar->vector conversion and +// comparison: +def CEQr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; + +// The i64 seteq fragment that does the vector comparison +def CEQv2i64compare: + CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; + +// i64 seteq (equality): the setcc result is i32, which is converted to a +// vector FSM mask when used in a select pattern. +// +// v2i64 seteq (equality): the setcc result is v4i32 +multiclass CompareEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CEQr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; +} + +defm I64EQ: CompareEqual64; + +def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; +def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; + +// i64 setne: +def : I64SETCCNegCond<setne, I64EQr64>; +def : I64SELECTNegCond<setne, I64EQr64>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setugt/setule: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CLGTr64ugt: + CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; + +def CLGTr64eq: + CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; + +def CLGTr64compare: + CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, + (XSWDv2i64 CLGTr64ugt.Fragment), + CLGTr64eq.Fragment)>; + +def CLGTv2i64ugt: + CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; + +def CLGTv2i64eq: + CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; + +def CLGTv2i64compare: + CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, + (XSWDv2i64 CLGTr64ugt.Fragment), + CLGTv2i64eq.Fragment)>; + +multiclass CompareLogicalGreaterThan64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<CLGTv2i64compare.Fragment>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGTr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; +} + +defm I64LGT: CompareLogicalGreaterThan64; + +def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; +//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), +// I64LGTv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond<setule, I64LGTr64>; +def : I64SELECTNegCond<setule, I64LGTr64>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setuge/setult: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CLGEr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, + CLGTr64eq.Fragment)), 0xb)>; + +def CLGEv2i64compare: + CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, + CLGTv2i64eq.Fragment)), 0xf)>; + +multiclass CompareLogicalGreaterEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<CLGEv2i64compare.Fragment>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGEr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; +} + +defm I64LGE: CompareLogicalGreaterEqual64; + +def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; +def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), + I64LGEv2i64.Fragment>; + + +// i64 setult: +def : I64SETCCNegCond<setult, I64LGEr64>; +def : I64SELECTNegCond<setult, I64LGEr64>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setgt/setle: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CGTr64sgt: + CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; + +def CGTr64eq: + CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG))>; + +def CGTr64compare: + CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, + (XSWDv2i64 CGTr64sgt.Fragment), + CGTr64eq.Fragment)>; + +def CGTv2i64sgt: + CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; + +def CGTv2i64eq: + CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; + +def CGTv2i64compare: + CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, + (XSWDv2i64 CGTr64sgt.Fragment), + CGTv2i64eq.Fragment)>; + +multiclass CompareGreaterThan64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<CGTv2i64compare.Fragment>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CGTr64compare.Fragment), R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS + (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; +} + +defm I64GT: CompareLogicalGreaterThan64; + +def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; +//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), +// I64GTv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond<setle, I64GTr64>; +def : I64SELECTNegCond<setle, I64GTr64>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// i64 setge/setlt: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def CGEr64compare: + CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, + CGTr64eq.Fragment)), 0xb)>; + +def CGEv2i64compare: + CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, + CGTv2i64eq.Fragment)), 0xf)>; + +multiclass CompareGreaterEqual64 { + // Plain old comparison, converts back to i32 scalar + def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; + def v2i64: CodeFrag<CGEv2i64compare.Fragment>; + + // SELB mask from FSM: + def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; + def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; +} + +defm I64GE: CompareGreaterEqual64; + +def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; +def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), + I64GEv2i64.Fragment>; + +// i64 setult: +def : I64SETCCNegCond<setlt, I64GEr64>; +def : I64SELECTNegCond<setlt, I64GEr64>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 add +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_add_cg<dag lhs, dag rhs>: + CodeFrag<(CGv4i32 lhs, rhs)>; + +class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>: + CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; + +class v2i64_add<dag lhs, dag rhs, dag cg_mask>: + v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>; + +def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; + +def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_add<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 subtraction +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>; + +class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>: + CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; + +def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (COPY_TO_REGCLASS + v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; + +def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_sub<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB), + v2i64_sub_bg<(v2i64 VECREG:$rA), + (v2i64 VECREG:$rB)>.Fragment, + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v2i64, i64 multiply +// +// Note: i64 multiply is simply the vector->scalar conversion of the +// full-on v2i64 multiply, since the entire vector has to be manipulated +// anyway. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class v2i64_mul_ahi64<dag rA> : + CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_bhi64<dag rB> : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; + +class v2i64_mul_alo64<dag rB> : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_blo64<dag rB> : + CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; + +class v2i64_mul_ashlq2<dag rA>: + CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; + +class v2i64_mul_ashlq4<dag rA>: + CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; + +class v2i64_mul_bshlq2<dag rB> : + CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; + +class v2i64_mul_bshlq4<dag rB> : + CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; + +class v2i64_highprod<dag rA, dag rB>: + CodeFrag<(Av4i32 + (Av4i32 + (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3 + v2i64_mul_ahi64<rA>.Fragment), + (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3 + v2i64_mul_bshlq4<rB>.Fragment)), + (Av4i32 + (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment, + v2i64_mul_ashlq4<rA>.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment, + v2i64_mul_bhi64<rB>.Fragment), + (Av4i32 + (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment, + v2i64_mul_bhi64<rB>.Fragment), + (Av4i32 + (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment, + v2i64_mul_bshlq2<rB>.Fragment), + (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment, + v2i64_mul_bshlq2<rB>.Fragment))))))>; + +class v2i64_mul_a3_b3<dag rA, dag rB>: + CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment, + v2i64_mul_blo64<rB>.Fragment)>; + +class v2i64_mul_a2_b3<dag rA, dag rB>: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment, + v2i64_mul_bshlq2<rB>.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_mul_a3_b2<dag rA, dag rB>: + CodeFrag<(SELBv4i32 (SHLQBYIv4i32 + (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment, + v2i64_mul_ashlq2<rA>.Fragment), 0x2), + (ILv4i32 0), + (FSMBIv4i32 0xc3c3))>; + +class v2i64_lowsum<dag rA, dag rB, dag rCGmask>: + v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment, + v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment, + v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>; + +class v2i64_mul<dag rA, dag rB, dag rCGmask>: + v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment, + (SELBv4i32 v2i64_highprod<rA, rB>.Fragment, + (ILv4i32 0), + (FSMBIv4i32 0x0f0f)), + rCGmask>; + +def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), + (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), + (COPY_TO_REGCLASS R64C:$rB, VECREG), + (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; + +def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)), + v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), + (v4i32 VECREG:$rCGmask)>.Fragment>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// f64 comparisons +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// selb instruction definition for i64. Note that the selection mask is +// a vector, produced by various forms of FSM: +def SELBf64_cond: + SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), + [(set R64FP:$rT, + (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp new file mode 100644 index 000000000000..14021fef05d9 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -0,0 +1,333 @@ +//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a printer that converts from our internal representation +// of machine-dependent LLVM code to Cell SPU assembly language. This printer +// is the output mechanism used by `llc'. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "asmprinter" +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + class SPUAsmPrinter : public AsmPrinter { + public: + explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) : + AsmPrinter(TM, Streamer) {} + + virtual const char *getPassName() const { + return "STI CBEA SPU Assembly Printer"; + } + + /// printInstruction - This method is automatically generated by tablegen + /// from the instruction set description. + void printInstruction(const MachineInstr *MI, raw_ostream &OS); + static const char *getRegisterName(unsigned RegNo); + + + void EmitInstruction(const MachineInstr *MI) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printInstruction(MI, OS); + OutStreamer.EmitRawText(OS.str()); + } + void printOp(const MachineOperand &MO, raw_ostream &OS); + + void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (MO.isReg()) { + O << getRegisterName(MO.getReg()); + } else if (MO.isImm()) { + O << MO.getImm(); + } else { + printOp(MO, O); + } + } + + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O); + + + void + printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + unsigned int value = MI->getOperand(OpNo).getImm(); + assert(value < (1 << 8) && "Invalid u7 argument"); + O << value; + } + + void + printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + char value = MI->getOperand(OpNo).getImm(); + O << (int) value; + O << "("; + printOperand(MI, OpNo+1, O); + O << ")"; + } + + void + printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + O << (short) MI->getOperand(OpNo).getImm(); + } + + void + printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + O << (unsigned short)MI->getOperand(OpNo).getImm(); + } + + void + printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + // When used as the base register, r0 reads constant zero rather than + // the value contained in the register. For this reason, the darwin + // assembler requires that we print r0 as 0 (no r) when used as the base. + const MachineOperand &MO = MI->getOperand(OpNo); + O << getRegisterName(MO.getReg()) << ", "; + printOperand(MI, OpNo+1, O); + } + + void + printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + unsigned int value = MI->getOperand(OpNo).getImm(); + assert(value <= (1 << 19) - 1 && "Invalid u18 argument"); + O << value; + } + + void + printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) + >> 16); + assert((value >= -(1 << 9) && value <= (1 << 9) - 1) + && "Invalid s10 argument"); + O << value; + } + + void + printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16) + >> 16); + assert((value <= (1 << 10) - 1) && "Invalid u10 argument"); + O << value; + } + + void + printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + assert(MI->getOperand(OpNo).isImm() && + "printDFormAddr first operand is not immediate"); + int64_t value = int64_t(MI->getOperand(OpNo).getImm()); + int16_t value16 = int16_t(value); + assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1) + && "Invalid dform s10 offset argument"); + O << (value16 & ~0xf) << "("; + printOperand(MI, OpNo+1, O); + O << ")"; + } + + void + printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) + { + /* Note: operand 1 is an offset or symbol name. */ + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + if (MI->getOperand(OpNo+1).isImm()) { + int displ = int(MI->getOperand(OpNo+1).getImm()); + if (displ > 0) + O << "+" << displ; + else if (displ < 0) + O << displ; + } + } + } + + void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + + void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + + void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + // Used to generate a ".-<target>", but it turns out that the assembler + // really wants the target. + // + // N.B.: This operand is used for call targets. Branch hints are another + // animal entirely. + printOp(MI->getOperand(OpNo), O); + } + + void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + O << "@h"; + } + } + + void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + printS16ImmOperand(MI, OpNo, O); + } else { + printOp(MI->getOperand(OpNo), O); + O << "@l"; + } + } + + /// Print local store address + void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + + void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo, + raw_ostream &O) { + if (MI->getOperand(OpNo).isImm()) { + int value = (int) MI->getOperand(OpNo).getImm(); + assert((value >= 0 && value < 16) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } else { + llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + } + } + + void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){ + assert(MI->getOperand(OpNo).isImm() && + "Invalid/non-immediate rotate amount in printRotateNeg7Imm"); + int value = (int) MI->getOperand(OpNo).getImm(); + assert((value >= 0 && value <= 32) + && "Invalid negated immediate rotate 7-bit argument"); + O << -value; + } + }; +} // end of anonymous namespace + +// Include the auto-generated portion of the assembly writer +#include "SPUGenAsmWriter.inc" + +void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { + switch (MO.getType()) { + case MachineOperand::MO_Immediate: + report_fatal_error("printOp() does not handle immediate values"); + + case MachineOperand::MO_MachineBasicBlock: + O << *MO.getMBB()->getSymbol(); + return; + case MachineOperand::MO_JumpTableIndex: + O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ConstantPoolIndex: + O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() + << '_' << MO.getIndex(); + return; + case MachineOperand::MO_ExternalSymbol: + // Computing the address of an external symbol, not calling it. + if (TM.getRelocationModel() != Reloc::Static) { + O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName() + << "$non_lazy_ptr"; + return; + } + O << *GetExternalSymbolSymbol(MO.getSymbolName()); + return; + case MachineOperand::MO_GlobalAddress: + // External or weakly linked global variables need non-lazily-resolved + // stubs + if (TM.getRelocationModel() != Reloc::Static) { + const GlobalValue *GV = MO.getGlobal(); + if (((GV->isDeclaration() || GV->hasWeakLinkage() || + GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) { + O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + return; + } + } + O << *Mang->getSymbol(MO.getGlobal()); + return; + case MachineOperand::MO_MCSymbol: + O << *(MO.getMCSymbol()); + return; + default: + O << "<unknown operand type: " << MO.getType() << ">"; + return; + } +} + +/// PrintAsmOperand - Print out an operand for an inline asm expression. +/// +bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + switch (ExtraCode[0]) { + default: return true; // Unknown modifier. + case 'L': // Write second word of DImode reference. + // Verify that this operand has two consecutive registers. + if (!MI->getOperand(OpNo).isReg() || + OpNo+1 == MI->getNumOperands() || + !MI->getOperand(OpNo+1).isReg()) + return true; + ++OpNo; // Return the high-part. + break; + } + } + + printOperand(MI, OpNo, O); + return false; +} + +bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, + unsigned OpNo, unsigned AsmVariant, + const char *ExtraCode, + raw_ostream &O) { + if (ExtraCode && ExtraCode[0]) + return true; // Unknown modifier. + printMemRegReg(MI, OpNo, O); + return false; +} + +// Force static initialization. +extern "C" void LLVMInitializeCellSPUAsmPrinter() { + RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget); +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td new file mode 100644 index 000000000000..9f9692bf67fe --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUCallingConv.td @@ -0,0 +1,57 @@ +//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This describes the calling conventions for the STI Cell SPU architecture. +// +//===----------------------------------------------------------------------===// + +/// CCIfSubtarget - Match if the current subtarget has a feature F. +class CCIfSubtarget<string F, CCAction A> + : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>; + +//===----------------------------------------------------------------------===// +// Return Value Calling Convention +//===----------------------------------------------------------------------===// + +// Return-value convention for Cell SPU: return value to be passed in reg 3-74 +def RetCC_SPU : CallingConv<[ + CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>> +]>; + + +//===----------------------------------------------------------------------===// +// CellSPU Argument Calling Conventions +//===----------------------------------------------------------------------===// +def CCC_SPU : CallingConv<[ + CCIfType<[i8, i16, i32, i64, i128, f32, f64, + v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74]>>, + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, + + // Vectors get 16-byte stack slots that are 16-byte aligned. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToStack<16, 16>> +]>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp new file mode 100644 index 000000000000..fac806e1b0ea --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.cpp @@ -0,0 +1,256 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPUFrameLowering.h" +#include "SPU.h" +#include "SPUInstrBuilder.h" +#include "SPUInstrInfo.h" +#include "llvm/Function.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// SPUFrameLowering: +//===----------------------------------------------------------------------===// + +SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0), + Subtarget(sti) { + LR[0].first = SPU::R0; + LR[0].second = 16; +} + + +//-------------------------------------------------------------------------- +// hasFP - Return true if the specified function actually has a dedicated frame +// pointer register. This is true if the function needs a frame pointer and has +// a non-zero stack size. +bool SPUFrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + + return MFI->getStackSize() && + (MF.getTarget().Options.DisableFramePointerElim(MF) || + MFI->hasVarSizedObjects()); +} + + +/// determineFrameLayout - Determine the size of the frame and maximum call +/// frame size. +void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Get the number of bytes to allocate from the FrameInfo + unsigned FrameSize = MFI->getStackSize(); + + // Get the alignments provided by the target, and the maximum alignment + // (if any) of the fixed frame objects. + unsigned TargetAlign = getStackAlignment(); + unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment()); + assert(isPowerOf2_32(Align) && "Alignment is not power of 2"); + unsigned AlignMask = Align - 1; + + // Get the maximum call frame size of all the calls. + unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); + + // If we have dynamic alloca then maxCallFrameSize needs to be aligned so + // that allocations will be aligned. + if (MFI->hasVarSizedObjects()) + maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; + + // Update maximum call frame size. + MFI->setMaxCallFrameSize(maxCallFrameSize); + + // Include call frame size in total. + FrameSize += maxCallFrameSize; + + // Make sure the frame is aligned. + FrameSize = (FrameSize + AlignMask) & ~AlignMask; + + // Update frame info. + MFI->setStackSize(FrameSize); +} + +void SPUFrameLowering::emitPrologue(MachineFunction &MF) const { + MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB + MachineBasicBlock::iterator MBBI = MBB.begin(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + const SPUInstrInfo &TII = + *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo()); + MachineModuleInfo &MMI = MF.getMMI(); + DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + + // Prepare for debug frame info. + bool hasDebugInfo = MMI.hasDebugInfo(); + MCSymbol *FrameLabel = 0; + + // Move MBBI back to the beginning of the function. + MBBI = MBB.begin(); + + // Work out frame sizes. + determineFrameLayout(MF); + int FrameSize = MFI->getStackSize(); + + assert((FrameSize & 0xf) == 0 + && "SPURegisterInfo::emitPrologue: FrameSize not aligned"); + + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->adjustsStack()) { + FrameSize = -(FrameSize + SPUFrameLowering::minStackSize()); + if (hasDebugInfo) { + // Mark effective beginning of when frame pointer becomes valid. + FrameLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel); + } + + // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp) + // for the ABI + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16) + .addReg(SPU::R1); + if (isInt<10>(FrameSize)) { + // Spill $sp to adjusted $sp + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize) + .addReg(SPU::R1); + // Adjust $sp by required amout + BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1) + .addImm(FrameSize); + } else if (isInt<16>(FrameSize)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) + .addImm(-16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1) + .addReg(SPU::R2) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2) + .addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); + } + + if (hasDebugInfo) { + std::vector<MachineMove> &Moves = MMI.getFrameMoves(); + + // Show update of SP. + MachineLocation SPDst(MachineLocation::VirtualFP); + MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize); + Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc)); + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == SPU::R0) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc)); + } + + // Mark effective beginning of when frame pointer is ready. + MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol(); + BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel); + + MachineLocation FPDst(SPU::R1); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc)); + } + } +} + +void SPUFrameLowering::emitEpilogue(MachineFunction &MF, + MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); + const SPUInstrInfo &TII = + *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo()); + const MachineFrameInfo *MFI = MF.getFrameInfo(); + int FrameSize = MFI->getStackSize(); + int LinkSlotOffset = SPUFrameLowering::stackSlotSize(); + DebugLoc dl = MBBI->getDebugLoc(); + + assert(MBBI->getOpcode() == SPU::RET && + "Can only insert epilog into returning blocks"); + assert((FrameSize & 0xf) == 0 && "FrameSize not aligned"); + + // the "empty" frame size is 16 - just the register scavenger spill slot + if (FrameSize > 16 || MFI->adjustsStack()) { + FrameSize = FrameSize + SPUFrameLowering::minStackSize(); + if (isInt<10>(FrameSize + LinkSlotOffset)) { + // Reload $lr, adjust $sp by required amount + // Note: We do this to slightly improve dual issue -- not by much, but it + // is an opportunity for dual issue. + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) + .addImm(FrameSize + LinkSlotOffset) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1) + .addReg(SPU::R1) + .addImm(FrameSize); + } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) { + // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use + // $r2 to adjust $sp: + BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2) + .addImm(16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2) + .addImm(FrameSize); + BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1) + .addReg(SPU::R1) + .addReg(SPU::R2); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0) + .addImm(16) + .addReg(SPU::R1); + BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2). + addReg(SPU::R2) + .addImm(16); + BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2) + .addReg(SPU::R2) + .addReg(SPU::R1); + } else { + report_fatal_error("Unhandled frame size: " + Twine(FrameSize)); + } + } +} + +void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS) const{ + // Mark LR and SP unused, since the prolog spills them to stack and + // we don't want anyone else to spill them for us. + // + // Also, unless R2 is really used someday, don't spill it automatically. + MF.getRegInfo().setPhysRegUnused(SPU::R0); + MF.getRegInfo().setPhysRegUnused(SPU::R1); + MF.getRegInfo().setPhysRegUnused(SPU::R2); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterClass *RC = &SPU::R32CRegClass; + RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), + RC->getAlignment(), + false)); +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h new file mode 100644 index 000000000000..11c52818dd9c --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUFrameLowering.h @@ -0,0 +1,80 @@ +//===-- SPUFrameLowering.h - SPU Frame Lowering stuff ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains CellSPU frame information that doesn't fit anywhere else +// cleanly... +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_FRAMEINFO_H +#define SPU_FRAMEINFO_H + +#include "SPURegisterInfo.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { + class SPUSubtarget; + + class SPUFrameLowering: public TargetFrameLowering { + const SPUSubtarget &Subtarget; + std::pair<unsigned, int> LR[1]; + + public: + SPUFrameLowering(const SPUSubtarget &sti); + + //! Determine the frame's layour + void determineFrameLayout(MachineFunction &MF) const; + + /// emitProlog/emitEpilog - These methods insert prolog and epilog code into + /// the function. + void emitPrologue(MachineFunction &MF) const; + void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; + + //! Prediate: Target has dedicated frame pointer + bool hasFP(const MachineFunction &MF) const; + + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, + RegScavenger *RS = NULL) const; + + //! Return a function's saved spill slots + /*! + For CellSPU, a function's saved spill slots is just the link register. + */ + const std::pair<unsigned, int> * + getCalleeSaveSpillSlots(unsigned &NumEntries) const; + + //! Stack slot size (16 bytes) + static int stackSlotSize() { + return 16; + } + //! Maximum frame offset representable by a signed 10-bit integer + /*! + This is the maximum frame offset that can be expressed as a 10-bit + integer, used in D-form addresses. + */ + static int maxFrameOffset() { + return ((1 << 9) - 1) * stackSlotSize(); + } + //! Minimum frame offset representable by a signed 10-bit integer + static int minFrameOffset() { + return -(1 << 9) * stackSlotSize(); + } + //! Minimum frame size (enough to spill LR + SP) + static int minStackSize() { + return (2 * stackSlotSize()); + } + //! Convert frame index to stack offset + static int FItoStackOffset(int frame_index) { + return frame_index * stackSlotSize(); + } + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp new file mode 100644 index 000000000000..403d7ef1fd9e --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.cpp @@ -0,0 +1,141 @@ +//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements hazard recognizers for scheduling on Cell SPU +// processors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "sched" + +#include "SPUHazardRecognizers.h" +#include "SPU.h" +#include "SPUInstrInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// Cell SPU hazard recognizer +// +// This is the pipeline hazard recognizer for the Cell SPU processor. It does +// very little right now. +//===----------------------------------------------------------------------===// + +SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) : + TII(tii), + EvenOdd(0) +{ +} + +/// Return the pipeline hazard type encountered or generated by this +/// instruction. Currently returns NoHazard. +/// +/// \return NoHazard +ScheduleHazardRecognizer::HazardType +SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls) +{ + // Initial thoughts on how to do this, but this code cannot work unless the + // function's prolog and epilog code are also being scheduled so that we can + // accurately determine which pipeline is being scheduled. +#if 0 + assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead"); + + const SDNode *Node = SU->getNode()->getFlaggedMachineNode(); + ScheduleHazardRecognizer::HazardType retval = NoHazard; + bool mustBeOdd = false; + + switch (Node->getOpcode()) { + case SPU::LQDv16i8: + case SPU::LQDv8i16: + case SPU::LQDv4i32: + case SPU::LQDv4f32: + case SPU::LQDv2f64: + case SPU::LQDr128: + case SPU::LQDr64: + case SPU::LQDr32: + case SPU::LQDr16: + case SPU::LQAv16i8: + case SPU::LQAv8i16: + case SPU::LQAv4i32: + case SPU::LQAv4f32: + case SPU::LQAv2f64: + case SPU::LQAr128: + case SPU::LQAr64: + case SPU::LQAr32: + case SPU::LQXv4i32: + case SPU::LQXr128: + case SPU::LQXr64: + case SPU::LQXr32: + case SPU::LQXr16: + case SPU::STQDv16i8: + case SPU::STQDv8i16: + case SPU::STQDv4i32: + case SPU::STQDv4f32: + case SPU::STQDv2f64: + case SPU::STQDr128: + case SPU::STQDr64: + case SPU::STQDr32: + case SPU::STQDr16: + case SPU::STQDr8: + case SPU::STQAv16i8: + case SPU::STQAv8i16: + case SPU::STQAv4i32: + case SPU::STQAv4f32: + case SPU::STQAv2f64: + case SPU::STQAr128: + case SPU::STQAr64: + case SPU::STQAr32: + case SPU::STQAr16: + case SPU::STQAr8: + case SPU::STQXv16i8: + case SPU::STQXv8i16: + case SPU::STQXv4i32: + case SPU::STQXv4f32: + case SPU::STQXv2f64: + case SPU::STQXr128: + case SPU::STQXr64: + case SPU::STQXr32: + case SPU::STQXr16: + case SPU::STQXr8: + case SPU::RET: + mustBeOdd = true; + break; + default: + // Assume that this instruction can be on the even pipe + break; + } + + if (mustBeOdd && !EvenOdd) + retval = Hazard; + + DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard " + << retval << "\n"); + EvenOdd ^= 1; + return retval; +#else + return NoHazard; +#endif +} + +void SPUHazardRecognizer::EmitInstruction(SUnit *SU) +{ +} + +void SPUHazardRecognizer::AdvanceCycle() +{ + DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n"); +} + +void SPUHazardRecognizer::EmitNoop() +{ + AdvanceCycle(); +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h new file mode 100644 index 000000000000..675632cc7f13 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUHazardRecognizers.h @@ -0,0 +1,41 @@ +//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling on the Cell SPU +// processor. +// +//===----------------------------------------------------------------------===// + +#ifndef SPUHAZRECS_H +#define SPUHAZRECS_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" + +namespace llvm { + +class TargetInstrInfo; + +/// SPUHazardRecognizer +class SPUHazardRecognizer : public ScheduleHazardRecognizer +{ +private: + const TargetInstrInfo &TII; + int EvenOdd; + +public: + SPUHazardRecognizer(const TargetInstrInfo &TII); + virtual HazardType getHazardType(SUnit *SU, int Stalls); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void EmitNoop(); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp new file mode 100644 index 000000000000..c27caeae7d45 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -0,0 +1,1193 @@ +//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pattern matching instruction selector for the Cell SPU, +// converting from a legalized dag to a SPU-target dag. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "SPUHazardRecognizers.h" +#include "SPUFrameLowering.h" +#include "SPUTargetMachine.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/GlobalValue.h" +#include "llvm/Intrinsics.h" +#include "llvm/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates + bool + isI32IntS10Immediate(ConstantSDNode *CN) + { + return isInt<10>(CN->getSExtValue()); + } + + //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values + bool + isI32IntU10Immediate(ConstantSDNode *CN) + { + return isUInt<10>(CN->getSExtValue()); + } + + //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values + bool + isI16IntS10Immediate(ConstantSDNode *CN) + { + return isInt<10>(CN->getSExtValue()); + } + + //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values + bool + isI16IntU10Immediate(ConstantSDNode *CN) + { + return isUInt<10>((short) CN->getZExtValue()); + } + + //! ConstantSDNode predicate for signed 16-bit values + /*! + \arg CN The constant SelectionDAG node holding the value + \arg Imm The returned 16-bit value, if returning true + + This predicate tests the value in \a CN to see whether it can be + represented as a 16-bit, sign-extended quantity. Returns true if + this is the case. + */ + bool + isIntS16Immediate(ConstantSDNode *CN, short &Imm) + { + EVT vt = CN->getValueType(0); + Imm = (short) CN->getZExtValue(); + if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) { + return true; + } else if (vt == MVT::i32) { + int32_t i_val = (int32_t) CN->getZExtValue(); + short s_val = (short) i_val; + return i_val == s_val; + } else { + int64_t i_val = (int64_t) CN->getZExtValue(); + short s_val = (short) i_val; + return i_val == s_val; + } + } + + //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext. + static bool + isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm) + { + EVT vt = FPN->getValueType(0); + if (vt == MVT::f32) { + int val = FloatToBits(FPN->getValueAPF().convertToFloat()); + int sval = (int) ((val << 16) >> 16); + Imm = (short) val; + return val == sval; + } + + return false; + } + + //! Generate the carry-generate shuffle mask. + SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector<SDValue, 16 > ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); + } + + //! Generate the borrow-generate shuffle mask + SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) { + SmallVector<SDValue, 16 > ShufBytes; + + // Create the shuffle mask for "rotating" the borrow up one register slot + // once the borrow is generated. + ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); + ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size()); + } + + //===------------------------------------------------------------------===// + /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine + /// instructions for SelectionDAG operations. + /// + class SPUDAGToDAGISel : + public SelectionDAGISel + { + const SPUTargetMachine &TM; + const SPUTargetLowering &SPUtli; + unsigned GlobalBaseReg; + + public: + explicit SPUDAGToDAGISel(SPUTargetMachine &tm) : + SelectionDAGISel(tm), + TM(tm), + SPUtli(*tm.getTargetLowering()) + { } + + virtual bool runOnMachineFunction(MachineFunction &MF) { + // Make sure we re-emit a set of the global base reg if necessary + GlobalBaseReg = 0; + SelectionDAGISel::runOnMachineFunction(MF); + return true; + } + + /// getI32Imm - Return a target constant with the specified value, of type + /// i32. + inline SDValue getI32Imm(uint32_t Imm) { + return CurDAG->getTargetConstant(Imm, MVT::i32); + } + + /// getSmallIPtrImm - Return a target constant of pointer type. + inline SDValue getSmallIPtrImm(unsigned Imm) { + return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy()); + } + + SDNode *emitBuildVector(SDNode *bvNode) { + EVT vecVT = bvNode->getValueType(0); + DebugLoc dl = bvNode->getDebugLoc(); + + // Check to see if this vector can be represented as a CellSPU immediate + // constant by invoking all of the instruction selection predicates: + if (((vecVT == MVT::v8i16) && + (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) || + ((vecVT == MVT::v4i32) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) || + (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) || + ((vecVT == MVT::v2i64) && + ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) || + (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) { + HandleSDNode Dummy(SDValue(bvNode, 0)); + if (SDNode *N = Select(bvNode)) + return N; + return Dummy.getValue().getNode(); + } + + // No, need to emit a constant pool spill: + std::vector<Constant*> CV; + + for (size_t i = 0; i < bvNode->getNumOperands(); ++i) { + ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i)); + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + } + + const Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy()); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + SDValue CGPoolOffset = + SPU::LowerConstantPool(CPIdx, *CurDAG, TM); + + HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, + CurDAG->getEntryNode(), CGPoolOffset, + MachinePointerInfo::getConstantPool(), + false, false, false, Alignment)); + CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); + } + + /// Select - Convert the specified operand from a target-independent to a + /// target-specific node if it hasn't already been changed. + SDNode *Select(SDNode *N); + + //! Emit the instruction sequence for i64 shl + SDNode *SelectSHLi64(SDNode *N, EVT OpVT); + + //! Emit the instruction sequence for i64 srl + SDNode *SelectSRLi64(SDNode *N, EVT OpVT); + + //! Emit the instruction sequence for i64 sra + SDNode *SelectSRAi64(SDNode *N, EVT OpVT); + + //! Emit the necessary sequence for loading i64 constants: + SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl); + + //! Alternate instruction emit sequence for loading i64 constants + SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl); + + //! Returns true if the address N is an A-form (local store) address + bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index); + + //! D-form address predicate + bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// Alternate D-form address using i7 offset predicate + bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, + SDValue &Base); + + /// D-form address selection workhorse + bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp, + SDValue &Base, int minOffset, int maxOffset); + + //! Address predicate if N can be expressed as an indexed [r+r] operation. + bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index); + + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for + /// inline asm expressions. + virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, + char ConstraintCode, + std::vector<SDValue> &OutOps) { + SDValue Op0, Op1; + switch (ConstraintCode) { + default: return true; + case 'm': // memory + if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) + && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) + SelectXFormAddr(Op.getNode(), Op, Op0, Op1); + break; + case 'o': // offsetable + if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1) + && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) { + Op0 = Op; + Op1 = getSmallIPtrImm(0); + } + break; + case 'v': // not offsetable +#if 1 + llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled."); +#else + SelectAddrIdxOnly(Op, Op, Op0, Op1); + break; +#endif + } + + OutOps.push_back(Op0); + OutOps.push_back(Op1); + return false; + } + + virtual const char *getPassName() const { + return "Cell SPU DAG->DAG Pattern Instruction Selection"; + } + + private: + SDValue getRC( MVT ); + + // Include the pieces autogenerated from the target description. +#include "SPUGenDAGISel.inc" + }; +} + +/*! + \arg Op The ISD instruction operand + \arg N The address to be tested + \arg Base The base address + \arg Index The base address index + */ +bool +SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index) { + // These match the addr256k operand type: + EVT OffsVT = MVT::i16; + SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); + int64_t val; + + switch (N.getOpcode()) { + case ISD::Constant: + val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue(); + Base = CurDAG->getTargetConstant( val , MVT::i32); + Index = Zero; + return true; + case ISD::ConstantPool: + case ISD::GlobalAddress: + report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered."); + /*NOTREACHED*/ + + case ISD::TargetConstant: + case ISD::TargetGlobalAddress: + case ISD::TargetJumpTable: + report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global " + "not wrapped as A-form address."); + /*NOTREACHED*/ + + case SPUISD::AFormAddr: + // Just load from memory if there's only a single use of the location, + // otherwise, this will get handled below with D-form offset addresses + if (N.hasOneUse()) { + SDValue Op0 = N.getOperand(0); + switch (Op0.getOpcode()) { + case ISD::TargetConstantPool: + case ISD::TargetJumpTable: + Base = Op0; + Index = Zero; + return true; + + case ISD::TargetGlobalAddress: { + GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0); + const GlobalValue *GV = GSDN->getGlobal(); + if (GV->getAlignment() == 16) { + Base = Op0; + Index = Zero; + return true; + } + break; + } + } + } + break; + } + return false; +} + +bool +SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp, + SDValue &Base) { + const int minDForm2Offset = -(1 << 7); + const int maxDForm2Offset = (1 << 7) - 1; + return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset, + maxDForm2Offset); +} + +/*! + \arg Op The ISD instruction (ignored) + \arg N The address to be tested + \arg Base Base address register/pointer + \arg Index Base address index + + Examine the input address by a base register plus a signed 10-bit + displacement, [r+I10] (D-form address). + + \return true if \a N is a D-form address with \a Base and \a Index set + to non-empty SDValue instances. +*/ +bool +SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index) { + return DFormAddressPredicate(Op, N, Base, Index, + SPUFrameLowering::minFrameOffset(), + SPUFrameLowering::maxFrameOffset()); +} + +bool +SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index, int minOffset, + int maxOffset) { + unsigned Opc = N.getOpcode(); + EVT PtrTy = SPUtli.getPointerTy(); + + if (Opc == ISD::FrameIndex) { + // Stack frame index must be less than 512 (divided by 16): + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N); + int FI = int(FIN->getIndex()); + DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = " + << FI << "\n"); + if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI, PtrTy); + return true; + } + } else if (Opc == ISD::ADD) { + // Generated by getelementptr + const SDValue Op0 = N.getOperand(0); + const SDValue Op1 = N.getOperand(1); + + if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo) + || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) { + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N; + return true; + } else if (Op1.getOpcode() == ISD::Constant + || Op1.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op1); + int32_t offset = int32_t(CN->getSExtValue()); + + if (Op0.getOpcode() == ISD::FrameIndex) { + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0); + int FI = int(FIN->getIndex()); + DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI << "\n"); + + if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI, PtrTy); + return true; + } + } else if (offset > minOffset && offset < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = Op0; + return true; + } + } else if (Op0.getOpcode() == ISD::Constant + || Op0.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op0); + int32_t offset = int32_t(CN->getSExtValue()); + + if (Op1.getOpcode() == ISD::FrameIndex) { + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1); + int FI = int(FIN->getIndex()); + DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset + << " frame index = " << FI << "\n"); + + if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = CurDAG->getTargetFrameIndex(FI, PtrTy); + return true; + } + } else if (offset > minOffset && offset < maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = Op1; + return true; + } + } + } else if (Opc == SPUISD::IndirectAddr) { + // Indirect with constant offset -> D-Form address + const SDValue Op0 = N.getOperand(0); + const SDValue Op1 = N.getOperand(1); + + if (Op0.getOpcode() == SPUISD::Hi + && Op1.getOpcode() == SPUISD::Lo) { + // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0)) + Base = CurDAG->getTargetConstant(0, PtrTy); + Index = N; + return true; + } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) { + int32_t offset = 0; + SDValue idxOp; + + if (isa<ConstantSDNode>(Op1)) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op1); + offset = int32_t(CN->getSExtValue()); + idxOp = Op0; + } else if (isa<ConstantSDNode>(Op0)) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op0); + offset = int32_t(CN->getSExtValue()); + idxOp = Op1; + } + + if (offset >= minOffset && offset <= maxOffset) { + Base = CurDAG->getTargetConstant(offset, PtrTy); + Index = idxOp; + return true; + } + } + } else if (Opc == SPUISD::AFormAddr) { + Base = CurDAG->getTargetConstant(0, N.getValueType()); + Index = N; + return true; + } else if (Opc == SPUISD::LDRESULT) { + Base = CurDAG->getTargetConstant(0, N.getValueType()); + Index = N; + return true; + } else if (Opc == ISD::Register + ||Opc == ISD::CopyFromReg + ||Opc == ISD::UNDEF + ||Opc == ISD::Constant) { + unsigned OpOpc = Op->getOpcode(); + + if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { + // Direct load/store without getelementptr + SDValue Offs; + + Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); + + if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) { + if (Offs.getOpcode() == ISD::UNDEF) + Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); + + Base = Offs; + Index = N; + return true; + } + } else { + /* If otherwise unadorned, default to D-form address with 0 offset: */ + if (Opc == ISD::CopyFromReg) { + Index = N.getOperand(1); + } else { + Index = N; + } + + Base = CurDAG->getTargetConstant(0, Index.getValueType()); + return true; + } + } + + return false; +} + +/*! + \arg Op The ISD instruction operand + \arg N The address operand + \arg Base The base pointer operand + \arg Index The offset/index operand + + If the address \a N can be expressed as an A-form or D-form address, returns + false. Otherwise, creates two operands, Base and Index that will become the + (r)(r) X-form address. +*/ +bool +SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base, + SDValue &Index) { + if (!SelectAFormAddr(Op, N, Base, Index) + && !SelectDFormAddr(Op, N, Base, Index)) { + // If the address is neither A-form or D-form, punt and use an X-form + // address: + Base = N.getOperand(1); + Index = N.getOperand(0); + return true; + } + + return false; +} + +/*! + Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue + to be used as the last parameter of a +CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call + \arg VT the value type for which we want a register class +*/ +SDValue SPUDAGToDAGISel::getRC( MVT VT ) { + switch( VT.SimpleTy ) { + case MVT::i8: + return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32); + case MVT::i16: + return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32); + case MVT::i32: + return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32); + case MVT::f32: + return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32); + case MVT::i64: + return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32); + case MVT::i128: + return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32); + case MVT::v16i8: + case MVT::v8i16: + case MVT::v4i32: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v2f64: + return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32); + default: + assert( false && "add a new case here" ); + return SDValue(); + } +} + +//! Convert the operand from a target-independent to a target-specific node +/*! + */ +SDNode * +SPUDAGToDAGISel::Select(SDNode *N) { + unsigned Opc = N->getOpcode(); + int n_ops = -1; + unsigned NewOpc = 0; + EVT OpVT = N->getValueType(0); + SDValue Ops[8]; + DebugLoc dl = N->getDebugLoc(); + + if (N->isMachineOpcode()) + return NULL; // Already selected. + + if (Opc == ISD::FrameIndex) { + int FI = cast<FrameIndexSDNode>(N)->getIndex(); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0)); + SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0)); + + if (FI < 128) { + NewOpc = SPU::AIr32; + Ops[0] = TFI; + Ops[1] = Imm0; + n_ops = 2; + } else { + NewOpc = SPU::Ar32; + Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0)); + Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl, + N->getValueType(0), TFI), + 0); + n_ops = 2; + } + } else if (Opc == ISD::Constant && OpVT == MVT::i64) { + // Catch the i64 constants that end up here. Note: The backend doesn't + // attempt to legalize the constant (it's useless because DAGCombiner + // will insert 64-bit constants and we can't stop it). + return SelectI64Constant(N, OpVT, N->getDebugLoc()); + } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) + && OpVT == MVT::i64) { + SDValue Op0 = N->getOperand(0); + EVT Op0VT = Op0.getValueType(); + EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(), + Op0VT, (128 / Op0VT.getSizeInBits())); + EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); + SDValue shufMask; + + switch (Op0VT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT"); + /*NOTREACHED*/ + case MVT::i32: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x00010203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x08090a0b, MVT::i32)); + break; + + case MVT::i16: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800203, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80800a0b, MVT::i32)); + break; + + case MVT::i8: + shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x80808003, MVT::i32), + CurDAG->getConstant(0x80808080, MVT::i32), + CurDAG->getConstant(0x8080800b, MVT::i32)); + break; + } + + SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode()); + + HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl, + Op0VecVT, Op0)); + + SDValue PromScalar; + if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode())) + PromScalar = SDValue(N, 0); + else + PromScalar = PromoteScalar.getValue(); + + SDValue zextShuffle = + CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, + PromScalar, PromScalar, + SDValue(shufMaskLoad, 0)); + + HandleSDNode Dummy2(zextShuffle); + if (SDNode *N = SelectCode(Dummy2.getValue().getNode())) + zextShuffle = SDValue(N, 0); + else + zextShuffle = Dummy2.getValue(); + HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT, + zextShuffle)); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + SelectCode(Dummy.getValue().getNode()); + return Dummy.getValue().getNode(); + } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); + + HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); + } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode()); + + HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); + } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) { + SDNode *CGLoad = + emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode()); + + HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT, + N->getOperand(0), N->getOperand(1), + SDValue(CGLoad, 0))); + CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode()); + if (SDNode *N = SelectCode(Dummy.getValue().getNode())) + return N; + return Dummy.getValue().getNode(); + } else if (Opc == ISD::TRUNCATE) { + SDValue Op0 = N->getOperand(0); + if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL) + && OpVT == MVT::i32 + && Op0.getValueType() == MVT::i64) { + // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32 + // + // Take advantage of the fact that the upper 32 bits are in the + // i32 preferred slot and avoid shuffle gymnastics: + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1)); + if (CN != 0) { + unsigned shift_amt = unsigned(CN->getZExtValue()); + + if (shift_amt >= 32) { + SDNode *hi32 = + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + Op0.getOperand(0), getRC(MVT::i32)); + + shift_amt -= 32; + if (shift_amt > 0) { + // Take care of the additional shift, if present: + SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32); + unsigned Opc = SPU::ROTMAIr32_i32; + + if (Op0.getOpcode() == ISD::SRL) + Opc = SPU::ROTMr32; + + hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0), + shift); + } + + return hi32; + } + } + } + } else if (Opc == ISD::SHL) { + if (OpVT == MVT::i64) + return SelectSHLi64(N, OpVT); + } else if (Opc == ISD::SRL) { + if (OpVT == MVT::i64) + return SelectSRLi64(N, OpVT); + } else if (Opc == ISD::SRA) { + if (OpVT == MVT::i64) + return SelectSRAi64(N, OpVT); + } else if (Opc == ISD::FNEG + && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) { + DebugLoc dl = N->getDebugLoc(); + // Check if the pattern is a special form of DFNMS: + // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC)) + SDValue Op0 = N->getOperand(0); + if (Op0.getOpcode() == ISD::FSUB) { + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == ISD::FMUL) { + unsigned Opc = SPU::DFNMSf64; + if (OpVT == MVT::v2f64) + Opc = SPU::DFNMSv2f64; + + return CurDAG->getMachineNode(Opc, dl, OpVT, + Op00.getOperand(0), + Op00.getOperand(1), + Op0.getOperand(1)); + } + } + + SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64); + SDNode *signMask = 0; + unsigned Opc = SPU::XORfneg64; + + if (OpVT == MVT::f64) { + signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl); + } else if (OpVT == MVT::v2f64) { + Opc = SPU::XORfnegvec; + signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl, + MVT::v2i64, + negConst, negConst).getNode()); + } + + return CurDAG->getMachineNode(Opc, dl, OpVT, + N->getOperand(0), SDValue(signMask, 0)); + } else if (Opc == ISD::FABS) { + if (OpVT == MVT::f64) { + SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl); + return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT, + N->getOperand(0), SDValue(signMask, 0)); + } else if (OpVT == MVT::v2f64) { + SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64); + SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, + absConst, absConst); + SDNode *signMask = emitBuildVector(absVec.getNode()); + return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT, + N->getOperand(0), SDValue(signMask, 0)); + } + } else if (Opc == SPUISD::LDRESULT) { + // Custom select instructions for LDRESULT + EVT VT = N->getValueType(0); + SDValue Arg = N->getOperand(0); + SDValue Chain = N->getOperand(1); + SDNode *Result; + + Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT, + MVT::Other, Arg, + getRC( VT.getSimpleVT()), Chain); + return Result; + + } else if (Opc == SPUISD::IndirectAddr) { + // Look at the operands: SelectCode() will catch the cases that aren't + // specifically handled here. + // + // SPUInstrInfo catches the following patterns: + // (SPUindirect (SPUhi ...), (SPUlo ...)) + // (SPUindirect $sp, imm) + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + RegisterSDNode *RN; + + if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo) + || (Op0.getOpcode() == ISD::Register + && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0 + && RN->getReg() != SPU::R1))) { + NewOpc = SPU::Ar32; + Ops[1] = Op1; + if (Op1.getOpcode() == ISD::Constant) { + ConstantSDNode *CN = cast<ConstantSDNode>(Op1); + Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT); + if (isInt<10>(CN->getSExtValue())) { + NewOpc = SPU::AIr32; + Ops[1] = Op1; + } else { + Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl, + N->getValueType(0), + Op1), + 0); + } + } + Ops[0] = Op0; + n_ops = 2; + } + } + + if (n_ops > 0) { + if (N->hasOneUse()) + return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops); + else + return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops); + } else + return SelectCode(N); +} + +/*! + * Emit the instruction sequence for i64 left shifts. The basic algorithm + * is to fill the bottom two word slots with zeros so that zeros are shifted + * in as the entire quadword is shifted left. + * + * \note This code could also be used to implement v2i64 shl. + * + * @param Op The shl operand + * @param OpVT Op's machine value value type (doesn't need to be passed, but + * makes life easier.) + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); + SDValue ShiftAmt = N->getOperand(1); + EVT ShiftAmtVT = ShiftAmt.getValueType(); + SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0; + SDValue SelMaskVal; + DebugLoc dl = N->getDebugLoc(); + + VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, + Op0, getRC(MVT::v2i64) ); + SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16); + SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal); + ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT, + CurDAG->getTargetConstant(0, OpVT)); + VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, + SDValue(ZeroFill, 0), + SDValue(VecOp0, 0), + SDValue(SelMask, 0)); + + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { + unsigned bytes = unsigned(CN->getZExtValue()) >> 3; + unsigned bits = unsigned(CN->getZExtValue()) & 7; + + if (bytes > 0) { + Shift = + CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT, + SDValue(VecOp0, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + } + + if (bits > 0) { + Shift = + CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : VecOp0), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); + } + } else { + SDNode *Bytes = + CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(3, ShiftAmtVT)); + SDNode *Bits = + CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(7, ShiftAmtVT)); + Shift = + CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT, + SDValue(VecOp0, 0), SDValue(Bytes, 0)); + Shift = + CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(Bits, 0)); + } + + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); +} + +/*! + * Emit the instruction sequence for i64 logical right shifts. + * + * @param Op The shl operand + * @param OpVT Op's machine value value type (doesn't need to be passed, but + * makes life easier.) + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) { + SDValue Op0 = N->getOperand(0); + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); + SDValue ShiftAmt = N->getOperand(1); + EVT ShiftAmtVT = ShiftAmt.getValueType(); + SDNode *VecOp0, *Shift = 0; + DebugLoc dl = N->getDebugLoc(); + + VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT, + Op0, getRC(MVT::v2i64) ); + + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { + unsigned bytes = unsigned(CN->getZExtValue()) >> 3; + unsigned bits = unsigned(CN->getZExtValue()) & 7; + + if (bytes > 0) { + Shift = + CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT, + SDValue(VecOp0, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + } + + if (bits > 0) { + Shift = + CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : VecOp0), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); + } + } else { + SDNode *Bytes = + CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(3, ShiftAmtVT)); + SDNode *Bits = + CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT, + ShiftAmt, + CurDAG->getTargetConstant(7, ShiftAmtVT)); + + // Ensure that the shift amounts are negated! + Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, + SDValue(Bytes, 0), + CurDAG->getTargetConstant(0, ShiftAmtVT)); + + Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, + SDValue(Bits, 0), + CurDAG->getTargetConstant(0, ShiftAmtVT)); + + Shift = + CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT, + SDValue(VecOp0, 0), SDValue(Bytes, 0)); + Shift = + CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(Bits, 0)); + } + + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); +} + +/*! + * Emit the instruction sequence for i64 arithmetic right shifts. + * + * @param Op The shl operand + * @param OpVT Op's machine value value type (doesn't need to be passed, but + * makes life easier.) + * @return The SDNode with the entire instruction sequence + */ +SDNode * +SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) { + // Promote Op0 to vector + EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(), + OpVT, (128 / OpVT.getSizeInBits())); + SDValue ShiftAmt = N->getOperand(1); + EVT ShiftAmtVT = ShiftAmt.getValueType(); + DebugLoc dl = N->getDebugLoc(); + + SDNode *VecOp0 = + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + VecVT, N->getOperand(0), getRC(MVT::v2i64)); + + SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT); + SDNode *SignRot = + CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64, + SDValue(VecOp0, 0), SignRotAmt); + SDNode *UpperHalfSign = + CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32)); + + SDNode *UpperHalfSignMask = + CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0)); + SDNode *UpperLowerMask = + CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, + CurDAG->getTargetConstant(0xff00ULL, MVT::i16)); + SDNode *UpperLowerSelect = + CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT, + SDValue(UpperHalfSignMask, 0), + SDValue(VecOp0, 0), + SDValue(UpperLowerMask, 0)); + + SDNode *Shift = 0; + + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) { + unsigned bytes = unsigned(CN->getZExtValue()) >> 3; + unsigned bits = unsigned(CN->getZExtValue()) & 7; + + if (bytes > 0) { + bytes = 31 - bytes; + Shift = + CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT, + SDValue(UpperLowerSelect, 0), + CurDAG->getTargetConstant(bytes, ShiftAmtVT)); + } + + if (bits > 0) { + bits = 8 - bits; + Shift = + CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT, + SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0), + CurDAG->getTargetConstant(bits, ShiftAmtVT)); + } + } else { + SDNode *NegShift = + CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT, + ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT)); + + Shift = + CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT, + SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0)); + Shift = + CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT, + SDValue(Shift, 0), SDValue(NegShift, 0)); + } + + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(Shift, 0), getRC(MVT::i64)); +} + +/*! + Do the necessary magic necessary to load a i64 constant + */ +SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT, + DebugLoc dl) { + ConstantSDNode *CN = cast<ConstantSDNode>(N); + return SelectI64Constant(CN->getZExtValue(), OpVT, dl); +} + +SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT, + DebugLoc dl) { + EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2); + SDValue i64vec = + SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl); + + // Here's where it gets interesting, because we have to parse out the + // subtree handed back in i64vec: + + if (i64vec.getOpcode() == ISD::BITCAST) { + // The degenerate case where the upper and lower bits in the splat are + // identical: + SDValue Op0 = i64vec.getOperand(0); + + ReplaceUses(i64vec, Op0); + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + SDValue(emitBuildVector(Op0.getNode()), 0), + getRC(MVT::i64)); + } else if (i64vec.getOpcode() == SPUISD::SHUFB) { + SDValue lhs = i64vec.getOperand(0); + SDValue rhs = i64vec.getOperand(1); + SDValue shufmask = i64vec.getOperand(2); + + if (lhs.getOpcode() == ISD::BITCAST) { + ReplaceUses(lhs, lhs.getOperand(0)); + lhs = lhs.getOperand(0); + } + + SDNode *lhsNode = (lhs.getNode()->isMachineOpcode() + ? lhs.getNode() + : emitBuildVector(lhs.getNode())); + + if (rhs.getOpcode() == ISD::BITCAST) { + ReplaceUses(rhs, rhs.getOperand(0)); + rhs = rhs.getOperand(0); + } + + SDNode *rhsNode = (rhs.getNode()->isMachineOpcode() + ? rhs.getNode() + : emitBuildVector(rhs.getNode())); + + if (shufmask.getOpcode() == ISD::BITCAST) { + ReplaceUses(shufmask, shufmask.getOperand(0)); + shufmask = shufmask.getOperand(0); + } + + SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode() + ? shufmask.getNode() + : emitBuildVector(shufmask.getNode())); + + SDValue shufNode = + CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT, + SDValue(lhsNode, 0), SDValue(rhsNode, 0), + SDValue(shufMaskNode, 0)); + HandleSDNode Dummy(shufNode); + SDNode *SN = SelectCode(Dummy.getValue().getNode()); + if (SN == 0) SN = Dummy.getValue().getNode(); + + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, + OpVT, SDValue(SN, 0), getRC(MVT::i64)); + } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) { + return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT, + SDValue(emitBuildVector(i64vec.getNode()), 0), + getRC(MVT::i64)); + } else { + report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec" + "condition"); + } +} + +/// createSPUISelDag - This pass converts a legalized DAG into a +/// SPU-specific DAG, ready for instruction scheduling. +/// +FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) { + return new SPUDAGToDAGISel(TM); +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp new file mode 100644 index 000000000000..062374127e2f --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.cpp @@ -0,0 +1,3259 @@ +//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUTargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "SPUISelLowering.h" +#include "SPUTargetMachine.h" +#include "SPUFrameLowering.h" +#include "SPUMachineFunction.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/Intrinsics.h" +#include "llvm/CallingConv.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + // Byte offset of the preferred slot (counted from the MSB) + int prefslotOffset(EVT VT) { + int retval=0; + if (VT==MVT::i1) retval=3; + if (VT==MVT::i8) retval=3; + if (VT==MVT::i16) retval=2; + + return retval; + } + + //! Expand a library call into an actual call DAG node + /*! + \note + This code is taken from SelectionDAGLegalize, since it is not exposed as + part of the LLVM SelectionDAG API. + */ + + SDValue + ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, + bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + EVT ArgVT = Op.getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op.getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + Type *RetTy = + Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair<SDValue, SDValue> CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, Op.getDebugLoc()); + + return CallInfo.first; + } +} + +SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()), + SPUTM(TM) { + + // Use _setjmp/_longjmp instead of setjmp/longjmp. + setUseUnderscoreSetJmp(true); + setUseUnderscoreLongJmp(true); + + // Set RTLIB libcall names as used by SPU: + setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); + + // Set up the SPU's register classes: + addRegisterClass(MVT::i8, SPU::R8CRegisterClass); + addRegisterClass(MVT::i16, SPU::R16CRegisterClass); + addRegisterClass(MVT::i32, SPU::R32CRegisterClass); + addRegisterClass(MVT::i64, SPU::R64CRegisterClass); + addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); + addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); + addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); + + // SPU has no sign or zero extended loads for i1, i8, i16: + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); + + setTruncStoreAction(MVT::i128, MVT::i64, Expand); + setTruncStoreAction(MVT::i128, MVT::i32, Expand); + setTruncStoreAction(MVT::i128, MVT::i16, Expand); + setTruncStoreAction(MVT::i128, MVT::i8, Expand); + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // SPU constant load actions are custom lowered: + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); + + // SPU's loads and stores have to be custom lowered: + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; + ++sctype) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + setLoadExtAction(ISD::EXTLOAD, VT, Custom); + setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); + setLoadExtAction(ISD::SEXTLOAD, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } + + for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; + ++sctype) { + MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; + + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::STORE, VT, Custom); + + for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { + MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; + setTruncStoreAction(VT, StoreVT, Expand); + } + } + + // Expand the jumptable branches + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // Custom lower SELECT_CC for most cases, but expand by default + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); + + // SPU has no intrinsics for these particular operations: + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); + + // SPU has no division/remainder instructions + setOperationAction(ISD::SREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i16, Expand); + setOperationAction(ISD::UREM, MVT::i16, Expand); + setOperationAction(ISD::SDIV, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i16, Expand); + setOperationAction(ISD::SDIVREM, MVT::i16, Expand); + setOperationAction(ISD::UDIVREM, MVT::i16, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SREM, MVT::i128, Expand); + setOperationAction(ISD::UREM, MVT::i128, Expand); + setOperationAction(ISD::SDIV, MVT::i128, Expand); + setOperationAction(ISD::UDIV, MVT::i128, Expand); + setOperationAction(ISD::SDIVREM, MVT::i128, Expand); + setOperationAction(ISD::UDIVREM, MVT::i128, Expand); + + // We don't support sin/cos/sqrt/fmod + setOperationAction(ISD::FSIN , MVT::f64, Expand); + setOperationAction(ISD::FCOS , MVT::f64, Expand); + setOperationAction(ISD::FREM , MVT::f64, Expand); + setOperationAction(ISD::FSIN , MVT::f32, Expand); + setOperationAction(ISD::FCOS , MVT::f32, Expand); + setOperationAction(ISD::FREM , MVT::f32, Expand); + + // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt + // for f32!) + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + + setOperationAction(ISD::FMA, MVT::f64, Expand); + setOperationAction(ISD::FMA, MVT::f32, Expand); + + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + + // SPU can do rotate right and left, so legalize it... but customize for i8 + // because instructions don't exist. + + // FIXME: Change from "expand" to appropriate type once ROTR is supported in + // .td files. + setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); + setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); + setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); + + setOperationAction(ISD::ROTL, MVT::i32, Legal); + setOperationAction(ISD::ROTL, MVT::i16, Legal); + setOperationAction(ISD::ROTL, MVT::i8, Custom); + + // SPU has no native version of shift left/right for i8 + setOperationAction(ISD::SHL, MVT::i8, Custom); + setOperationAction(ISD::SRL, MVT::i8, Custom); + setOperationAction(ISD::SRA, MVT::i8, Custom); + + // Make these operations legal and handle them during instruction selection: + setOperationAction(ISD::SHL, MVT::i64, Legal); + setOperationAction(ISD::SRL, MVT::i64, Legal); + setOperationAction(ISD::SRA, MVT::i64, Legal); + + // Custom lower i8, i32 and i64 multiplications + setOperationAction(ISD::MUL, MVT::i8, Custom); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MUL, MVT::i64, Legal); + + // Expand double-width multiplication + // FIXME: It would probably be reasonable to support some of these operations + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::MULHU, MVT::i16, Expand); + setOperationAction(ISD::MULHS, MVT::i16, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + + // Need to custom handle (some) common i8, i64 math ops + setOperationAction(ISD::ADD, MVT::i8, Custom); + setOperationAction(ISD::ADD, MVT::i64, Legal); + setOperationAction(ISD::SUB, MVT::i8, Custom); + setOperationAction(ISD::SUB, MVT::i64, Legal); + + // SPU does not have BSWAP. It does have i32 support CTLZ. + // CTPOP has to be custom lowered. + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i8, Custom); + setOperationAction(ISD::CTPOP, MVT::i16, Custom); + setOperationAction(ISD::CTPOP, MVT::i32, Custom); + setOperationAction(ISD::CTPOP, MVT::i64, Custom); + setOperationAction(ISD::CTPOP, MVT::i128, Expand); + + setOperationAction(ISD::CTTZ , MVT::i8, Expand); + setOperationAction(ISD::CTTZ , MVT::i16, Expand); + setOperationAction(ISD::CTTZ , MVT::i32, Expand); + setOperationAction(ISD::CTTZ , MVT::i64, Expand); + setOperationAction(ISD::CTTZ , MVT::i128, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i128, Expand); + + setOperationAction(ISD::CTLZ , MVT::i8, Promote); + setOperationAction(ISD::CTLZ , MVT::i16, Promote); + setOperationAction(ISD::CTLZ , MVT::i32, Legal); + setOperationAction(ISD::CTLZ , MVT::i64, Expand); + setOperationAction(ISD::CTLZ , MVT::i128, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i128, Expand); + + // SPU has a version of select that implements (a&~c)|(b&c), just like + // select ought to work: + setOperationAction(ISD::SELECT, MVT::i8, Legal); + setOperationAction(ISD::SELECT, MVT::i16, Legal); + setOperationAction(ISD::SELECT, MVT::i32, Legal); + setOperationAction(ISD::SELECT, MVT::i64, Legal); + + setOperationAction(ISD::SETCC, MVT::i8, Legal); + setOperationAction(ISD::SETCC, MVT::i16, Legal); + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SETCC, MVT::f64, Custom); + + // Custom lower i128 -> i64 truncates + setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); + + // Custom lower i32/i64 -> i128 sign extend + setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); + + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + // SPU has a legal FP -> signed INT instruction for f32, but for f64, need + // to expand to a libcall, hence the custom lowering: + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); + + // FDIV on SPU requires custom lowering + setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall + + // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + + setOperationAction(ISD::BITCAST, MVT::i32, Legal); + setOperationAction(ISD::BITCAST, MVT::f32, Legal); + setOperationAction(ISD::BITCAST, MVT::i64, Legal); + setOperationAction(ISD::BITCAST, MVT::f64, Legal); + + // We cannot sextinreg(i1). Expand to shifts. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // We want to legalize GlobalAddress and ConstantPool nodes into the + // appropriate instructions to materialize the address. + for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; + ++sctype) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; + + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); + } + + // VASTART needs to be custom lowered to use the VarArgsFrameIndex + setOperationAction(ISD::VASTART , MVT::Other, Custom); + + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); + + // Cell SPU has instructions for converting between i64 and fp. + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); + + // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); + + // BUILD_PAIR can't be handled natively, and should be expanded to shl/or + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // First set operation action for all vector types to expand. Then we + // will selectively turn on ones that can be effectively codegen'd. + addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); + addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); + + for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)i; + + // Set operation actions to legal types only. + if (!isTypeLegal(VT)) continue; + + // add/sub are legal for all supported vector VT's. + setOperationAction(ISD::ADD, VT, Legal); + setOperationAction(ISD::SUB, VT, Legal); + // mul has to be custom lowered. + setOperationAction(ISD::MUL, VT, Legal); + + setOperationAction(ISD::AND, VT, Legal); + setOperationAction(ISD::OR, VT, Legal); + setOperationAction(ISD::XOR, VT, Legal); + setOperationAction(ISD::LOAD, VT, Custom); + setOperationAction(ISD::SELECT, VT, Legal); + setOperationAction(ISD::STORE, VT, Custom); + + // These operations need to be expanded: + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::UREM, VT, Expand); + + // Expand all trunc stores + for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { + MVT::SimpleValueType TargetVT = (MVT::SimpleValueType)j; + setTruncStoreAction(VT, TargetVT, Expand); + } + + // Custom lower build_vector, constant pool spills, insert and + // extract vector elements: + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + } + + setOperationAction(ISD::SHL, MVT::v2i64, Expand); + + setOperationAction(ISD::AND, MVT::v16i8, Custom); + setOperationAction(ISD::OR, MVT::v16i8, Custom); + setOperationAction(ISD::XOR, MVT::v16i8, Custom); + setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); + + setOperationAction(ISD::FDIV, MVT::v4f32, Legal); + + setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct? + + setStackPointerRegisterToSaveRestore(SPU::R1); + + // We have target-specific dag combine patterns for the following nodes: + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::ZERO_EXTEND); + setTargetDAGCombine(ISD::SIGN_EXTEND); + setTargetDAGCombine(ISD::ANY_EXTEND); + + setMinFunctionAlignment(3); + + computeRegisterProperties(); + + // Set pre-RA register scheduler default to BURR, which produces slightly + // better code than the default (could also be TDRR, but TargetLowering.h + // needs a mod to support that model): + setSchedulingPreference(Sched::RegPressure); +} + +const char *SPUTargetLowering::getTargetNodeName(unsigned Opcode) const { + switch (Opcode) { + default: return 0; + case SPUISD::RET_FLAG: return "SPUISD::RET_FLAG"; + case SPUISD::Hi: return "SPUISD::Hi"; + case SPUISD::Lo: return "SPUISD::Lo"; + case SPUISD::PCRelAddr: return "SPUISD::PCRelAddr"; + case SPUISD::AFormAddr: return "SPUISD::AFormAddr"; + case SPUISD::IndirectAddr: return "SPUISD::IndirectAddr"; + case SPUISD::LDRESULT: return "SPUISD::LDRESULT"; + case SPUISD::CALL: return "SPUISD::CALL"; + case SPUISD::SHUFB: return "SPUISD::SHUFB"; + case SPUISD::SHUFFLE_MASK: return "SPUISD::SHUFFLE_MASK"; + case SPUISD::CNTB: return "SPUISD::CNTB"; + case SPUISD::PREFSLOT2VEC: return "SPUISD::PREFSLOT2VEC"; + case SPUISD::VEC2PREFSLOT: return "SPUISD::VEC2PREFSLOT"; + case SPUISD::SHL_BITS: return "SPUISD::SHL_BITS"; + case SPUISD::SHL_BYTES: return "SPUISD::SHL_BYTES"; + case SPUISD::VEC_ROTL: return "SPUISD::VEC_ROTL"; + case SPUISD::VEC_ROTR: return "SPUISD::VEC_ROTR"; + case SPUISD::ROTBYTES_LEFT: return "SPUISD::ROTBYTES_LEFT"; + case SPUISD::ROTBYTES_LEFT_BITS: return "SPUISD::ROTBYTES_LEFT_BITS"; + case SPUISD::SELECT_MASK: return "SPUISD::SELECT_MASK"; + case SPUISD::SELB: return "SPUISD::SELB"; + case SPUISD::ADD64_MARKER: return "SPUISD::ADD64_MARKER"; + case SPUISD::SUB64_MARKER: return "SPUISD::SUB64_MARKER"; + case SPUISD::MUL64_MARKER: return "SPUISD::MUL64_MARKER"; + } +} + +//===----------------------------------------------------------------------===// +// Return the Cell SPU's SETCC result type +//===----------------------------------------------------------------------===// + +EVT SPUTargetLowering::getSetCCResultType(EVT VT) const { + // i8, i16 and i32 are valid SETCC result types + MVT::SimpleValueType retval; + + switch(VT.getSimpleVT().SimpleTy){ + case MVT::i1: + case MVT::i8: + retval = MVT::i8; break; + case MVT::i16: + retval = MVT::i16; break; + case MVT::i32: + default: + retval = MVT::i32; + } + return retval; +} + +//===----------------------------------------------------------------------===// +// Calling convention code: +//===----------------------------------------------------------------------===// + +#include "SPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// LowerOperation implementation +//===----------------------------------------------------------------------===// + +/// Custom lower loads for CellSPU +/*! + All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to rotate to extract the requested element. + + For extending loads, we also want to ensure that the following sequence is + emitted, e.g. for MVT::f32 extending load to MVT::f64: + +\verbatim +%1 v16i8,ch = load +%2 v16i8,ch = rotate %1 +%3 v4f8, ch = bitconvert %2 +%4 f32 = vec2perfslot %3 +%5 f64 = fp_extend %4 +\endverbatim +*/ +static SDValue +LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + LoadSDNode *LN = cast<LoadSDNode>(Op); + SDValue the_chain = LN->getChain(); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + EVT InVT = LN->getMemoryVT(); + EVT OutVT = Op.getValueType(); + ISD::LoadExtType ExtType = LN->getExtensionType(); + unsigned alignment = LN->getAlignment(); + int pso = prefslotOffset(InVT); + DebugLoc dl = Op.getDebugLoc(); + EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT, + (128 / InVT.getSizeInBits())); + + // two sanity checks + assert( LN->getAddressingMode() == ISD::UNINDEXED + && "we should get only UNINDEXED adresses"); + // clean aligned loads can be selected as-is + if (InVT.getSizeInBits() == 128 && (alignment%16) == 0) + return SDValue(); + + // Get pointerinfos to the memory chunk(s) that contain the data to load + uint64_t mpi_offset = LN->getPointerInfo().Offset; + mpi_offset -= mpi_offset%16; + MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset); + MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16); + + SDValue result; + SDValue basePtr = LN->getBasePtr(); + SDValue rotate; + + if ((alignment%16) == 0) { + ConstantSDNode *CN; + + // Special cases for a known aligned load to simplify the base pointer + // and the rotation amount: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + int64_t rotamt = int64_t((offset & 0xf) - pso); + + if (rotamt < 0) + rotamt += 16; + + rotate = DAG.getConstant(rotamt, MVT::i16); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) + || (basePtr.getOpcode() == SPUISD::IndirectAddr + && basePtr.getOperand(0).getOpcode() == SPUISD::Hi + && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { + // Plain aligned a-form address: rotate into preferred slot + // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) + int64_t rotamt = -pso; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getConstant(rotamt, MVT::i16); + } else { + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + int64_t rotamt = -pso; + if (rotamt < 0) + rotamt += 16; + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(rotamt, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa<ConstantSDNode>(Op1)) { + // Convert the (add <ptr>, <const>) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add <arg1>, <arg2>) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Offset the rotate amount by the basePtr and the preferred slot + // byte offset + rotate = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(-pso, PtrVT)); + } + + // Do the load as a i128 to allow possible shifting + SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, + lowMemPtr, + LN->isVolatile(), LN->isNonTemporal(), false, 16); + + // When the size is not greater than alignment we get all data with just + // one load + if (alignment >= InVT.getSizeInBits()/8) { + // Update the chain + the_chain = low.getValue(1); + + // Rotate into the preferred slot: + result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128, + low.getValue(0), rotate); + + // Convert the loaded v16i8 vector to the appropriate vector type + // specified by the operand: + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + InVT, (128 / InVT.getSizeInBits())); + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, + DAG.getNode(ISD::BITCAST, dl, vecVT, result)); + } + // When alignment is less than the size, we might need (known only at + // run-time) two loads + // TODO: if the memory address is composed only from constants, we have + // extra kowledge, and might avoid the second load + else { + // storage position offset from lower 16 byte aligned memory chunk + SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, + basePtr, DAG.getConstant( 0xf, MVT::i32 ) ); + // get a registerfull of ones. (this implementation is a workaround: LLVM + // cannot handle 128 bit signed int constants) + SDValue ones = DAG.getConstant(-1, MVT::v4i32 ); + ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); + + SDValue high = DAG.getLoad(MVT::i128, dl, the_chain, + DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(16, PtrVT)), + highMemPtr, + LN->isVolatile(), LN->isNonTemporal(), false, + 16); + + the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), + high.getValue(1)); + + // Shift the (possible) high part right to compensate the misalignemnt. + // if there is no highpart (i.e. value is i64 and offset is 4), this + // will zero out the high value. + high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high, + DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset + )); + + // Shift the low similarly + // TODO: add SPUISD::SHL_BYTES + low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); + + // Merge the two parts + result = DAG.getNode(ISD::BITCAST, dl, vecVT, + DAG.getNode(ISD::OR, dl, MVT::i128, low, high)); + + if (!InVT.isVector()) { + result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result ); + } + + } + // Handle extending loads by extending the scalar result: + if (ExtType == ISD::SEXTLOAD) { + result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::ZEXTLOAD) { + result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); + } else if (ExtType == ISD::EXTLOAD) { + unsigned NewOpc = ISD::ANY_EXTEND; + + if (OutVT.isFloatingPoint()) + NewOpc = ISD::FP_EXTEND; + + result = DAG.getNode(NewOpc, dl, OutVT, result); + } + + SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); + SDValue retops[2] = { + result, + the_chain + }; + + result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, + retops, sizeof(retops) / sizeof(retops[0])); + return result; +} + +/// Custom lower stores for CellSPU +/*! + All CellSPU stores are aligned to 16-byte boundaries, so for elements + within a 16-byte block, we have to generate a shuffle to insert the + requested element into its place, then store the resulting block. + */ +static SDValue +LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + StoreSDNode *SN = cast<StoreSDNode>(Op); + SDValue Value = SN->getValue(); + EVT VT = Value.getValueType(); + EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + DebugLoc dl = Op.getDebugLoc(); + unsigned alignment = SN->getAlignment(); + SDValue result; + EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT, + (128 / StVT.getSizeInBits())); + // Get pointerinfos to the memory chunk(s) that contain the data to load + uint64_t mpi_offset = SN->getPointerInfo().Offset; + mpi_offset -= mpi_offset%16; + MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset); + MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16); + + + // two sanity checks + assert( SN->getAddressingMode() == ISD::UNINDEXED + && "we should get only UNINDEXED adresses"); + // clean aligned loads can be selected as-is + if (StVT.getSizeInBits() == 128 && (alignment%16) == 0) + return SDValue(); + + SDValue alignLoadVec; + SDValue basePtr = SN->getBasePtr(); + SDValue the_chain = SN->getChain(); + SDValue insertEltOffs; + + if ((alignment%16) == 0) { + ConstantSDNode *CN; + // Special cases for a known aligned load to simplify the base pointer + // and insertion byte: + if (basePtr.getOpcode() == ISD::ADD + && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { + // Known offset into basePtr + int64_t offset = CN->getSExtValue(); + + // Simplify the base pointer for this case: + basePtr = basePtr.getOperand(0); + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & 0xf), PtrVT)); + + if ((offset & ~0xf) > 0) { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant((offset & ~0xf), PtrVT)); + } + } else { + // Otherwise, assume it's at byte 0 of basePtr + insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + } else { + // Unaligned load: must be more pessimistic about addressing modes: + if (basePtr.getOpcode() == ISD::ADD) { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + SDValue Flag; + + SDValue Op0 = basePtr.getOperand(0); + SDValue Op1 = basePtr.getOperand(1); + + if (isa<ConstantSDNode>(Op1)) { + // Convert the (add <ptr>, <const>) to an indirect address contained + // in a register. Note that this is done because we need to avoid + // creating a 0(reg) d-form address due to the SPU's block loads. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); + basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); + } else { + // Convert the (add <arg1>, <arg2>) to an indirect address, which + // will likely be lowered as a reg(reg) x-form address. + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); + } + } else { + basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Insertion point is solely determined by basePtr's contents + insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, + basePtr, + DAG.getConstant(0, PtrVT)); + } + + // Load the lower part of the memory to which to store. + SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, + lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), + false, 16); + + // if we don't need to store over the 16 byte boundary, one store suffices + if (alignment >= StVT.getSizeInBits()/8) { + // Update the chain + the_chain = low.getValue(1); + + LoadSDNode *LN = cast<LoadSDNode>(low); + SDValue theValue = SN->getValue(); + + if (StVT != VT + && (theValue.getOpcode() == ISD::AssertZext + || theValue.getOpcode() == ISD::AssertSext)) { + // Drill down and get the value for zero- and sign-extended + // quantities + theValue = theValue.getOperand(0); + } + + // If the base pointer is already a D-form address, then just create + // a new D-form address with a slot offset and the orignal base pointer. + // Otherwise generate a D-form address with the slot offset relative + // to the stack pointer, which is always aligned. +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "CellSPU LowerSTORE: basePtr = "; + basePtr.getNode()->dump(&DAG); + errs() << "\n"; + } +#endif + + SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, + insertEltOffs); + SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, + theValue); + + result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, + vectorizeOp, low, + DAG.getNode(ISD::BITCAST, dl, + MVT::v4i32, insertEltOp)); + + result = DAG.getStore(the_chain, dl, result, basePtr, + lowMemPtr, + LN->isVolatile(), LN->isNonTemporal(), + 16); + + } + // do the store when it might cross the 16 byte memory access boundary. + else { + // TODO issue a warning if SN->isVolatile()== true? This is likely not + // what the user wanted. + + // address offset from nearest lower 16byte alinged address + SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32, + SN->getBasePtr(), + DAG.getConstant(0xf, MVT::i32)); + // 16 - offset + SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + offset); + // 16 - sizeof(Value) + SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant( 16, MVT::i32), + DAG.getConstant( VT.getSizeInBits()/8, + MVT::i32)); + // get a registerfull of ones + SDValue ones = DAG.getConstant(-1, MVT::v4i32); + ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones); + + // Create the 128 bit masks that have ones where the data to store is + // located. + SDValue lowmask, himask; + // if the value to store don't fill up the an entire 128 bits, zero + // out the last bits of the mask so that only the value we want to store + // is masked. + // this is e.g. in the case of store i32, align 2 + if (!VT.isVector()){ + Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value); + lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus); + lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, + surplus); + Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); + Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask); + + } + else { + lowmask = ones; + Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value); + } + // this will zero, if there are no data that goes to the high quad + himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask, + offset_compl); + lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask, + offset); + + // Load in the old data and zero out the parts that will be overwritten with + // the new data to store. + SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain, + DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, + DAG.getConstant( 16, PtrVT)), + highMemPtr, + SN->isVolatile(), SN->isNonTemporal(), + false, 16); + the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), + hi.getValue(1)); + + low = DAG.getNode(ISD::AND, dl, MVT::i128, + DAG.getNode( ISD::BITCAST, dl, MVT::i128, low), + DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones)); + hi = DAG.getNode(ISD::AND, dl, MVT::i128, + DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi), + DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones)); + + // Shift the Value to store into place. rlow contains the parts that go to + // the lower memory chunk, rhi has the parts that go to the upper one. + SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset); + rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask); + SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value, + offset_compl); + + // Merge the old data and the new data and store the results + // Need to convert vectors here to integer as 'OR'ing floats assert + rlow = DAG.getNode(ISD::OR, dl, MVT::i128, + DAG.getNode(ISD::BITCAST, dl, MVT::i128, low), + DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow)); + rhi = DAG.getNode(ISD::OR, dl, MVT::i128, + DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi), + DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi)); + + low = DAG.getStore(the_chain, dl, rlow, basePtr, + lowMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + hi = DAG.getStore(the_chain, dl, rhi, + DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, + DAG.getConstant( 16, PtrVT)), + highMemPtr, + SN->isVolatile(), SN->isNonTemporal(), 16); + result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0), + hi.getValue(0)); + } + + return result; +} + +//! Generate the address of a constant pool entry. +static SDValue +LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + const Constant *C = CP->getConstVal(); + SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); + SDValue Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + // Just return the SDValue with the constant pool address in it. + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } + } + + llvm_unreachable("LowerConstantPool: Relocation model other than static" + " not supported."); +} + +//! Alternate entry point for generating the address of a constant pool entry +SDValue +SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { + return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); +} + +static SDValue +LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + SDValue Zero = DAG.getConstant(0, PtrVT); + const TargetMachine &TM = DAG.getTarget(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } + } + + llvm_unreachable("LowerJumpTable: Relocation model other than static" + " not supported."); +} + +static SDValue +LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { + EVT PtrVT = Op.getValueType(); + GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + PtrVT, GSDN->getOffset()); + const TargetMachine &TM = DAG.getTarget(); + SDValue Zero = DAG.getConstant(0, PtrVT); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (TM.getRelocationModel() == Reloc::Static) { + if (!ST->usingLargeMem()) { + return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); + } else { + SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); + SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); + return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); + } + } else { + report_fatal_error("LowerGlobalAddress: Relocation model other than static" + "not supported."); + /*NOTREACHED*/ + } +} + +//! Custom lower double precision floating point constants +static SDValue +LowerConstantFP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + // FIXME there is no actual debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (VT == MVT::f64) { + ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); + + assert((FP != 0) && + "LowerConstantFP: Node is not ConstantFPSDNode"); + + uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); + SDValue T = DAG.getConstant(dbits, MVT::i64); + SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec)); + } + + return SDValue(); +} + +SDValue +SPUTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) + const { + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); + + unsigned ArgOffset = SPUFrameLowering::minStackSize(); + unsigned ArgRegIdx = 0; + unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); + + // Add DAG nodes to load the arguments or copy them out of registers. + for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { + EVT ObjectVT = Ins[ArgNo].VT; + unsigned ObjSize = ObjectVT.getSizeInBits()/8; + SDValue ArgVal; + CCValAssign &VA = ArgLocs[ArgNo]; + + if (VA.isRegLoc()) { + const TargetRegisterClass *ArgRegClass; + + switch (ObjectVT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("LowerFormalArguments Unhandled argument type: " + + Twine(ObjectVT.getEVTString())); + case MVT::i8: + ArgRegClass = &SPU::R8CRegClass; + break; + case MVT::i16: + ArgRegClass = &SPU::R16CRegClass; + break; + case MVT::i32: + ArgRegClass = &SPU::R32CRegClass; + break; + case MVT::i64: + ArgRegClass = &SPU::R64CRegClass; + break; + case MVT::i128: + ArgRegClass = &SPU::GPRCRegClass; + break; + case MVT::f32: + ArgRegClass = &SPU::R32FPRegClass; + break; + case MVT::f64: + ArgRegClass = &SPU::R64FPRegClass; + break; + case MVT::v2f64: + case MVT::v4f32: + case MVT::v2i64: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + ArgRegClass = &SPU::VECREGRegClass; + break; + } + + unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); + RegInfo.addLiveIn(VA.getLocReg(), VReg); + ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); + ++ArgRegIdx; + } else { + // We need to load the argument to a virtual register if we determined + // above that we ran out of physical registers of the appropriate type + // or we're forced to do vararg + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); + SDValue FIN = DAG.getFrameIndex(FI, PtrVT); + ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), + false, false, false, 0); + ArgOffset += StackSlotSize; + } + + InVals.push_back(ArgVal); + // Update the chain + Chain = ArgVal.getOperand(0); + } + + // vararg handling: + if (isVarArg) { + // FIXME: we should be able to query the argument registers from + // tablegen generated code. + static const uint16_t ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 + }; + // size of ArgRegs array + const unsigned NumArgRegs = 77; + + // We will spill (79-3)+1 registers to the stack + SmallVector<SDValue, 79-3+1> MemOps; + + // Create the frame slot + for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { + FuncInfo->setVarArgsFrameIndex( + MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); + SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass); + SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); + SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), + false, false, 0); + Chain = Store.getOperand(0); + MemOps.push_back(Store); + + // Increment address by stack slot size for the next stored argument + ArgOffset += StackSlotSize; + } + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } + + return Chain; +} + +/// isLSAAddress - Return the immediate to use if the specified +/// value is representable as a LSA address. +static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + if (!C) return 0; + + int Addr = C->getZExtValue(); + if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. + (Addr << 14 >> 14) != Addr) + return 0; // Top 14 bits have to be sext of immediate. + + return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); +} + +SDValue +SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool doesNotRet, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const { + // CellSPU target does not yet support tail call optimization. + isTailCall = false; + + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); + unsigned NumOps = Outs.size(); + unsigned StackSlotSize = SPUFrameLowering::stackSlotSize(); + + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); + + const unsigned NumArgRegs = ArgLocs.size(); + + + // Handy pointer type + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // Set up a copy of the stack pointer for use loading and storing any + // arguments that may not fit in the registers available for argument + // passing. + SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); + + // Figure out which arguments are going to go in registers, and which in + // memory. + unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR] + unsigned ArgRegIdx = 0; + + // Keep track of registers passing arguments + std::vector<std::pair<unsigned, SDValue> > RegsToPass; + // And the arguments passed on the stack + SmallVector<SDValue, 8> MemOpChains; + + for (; ArgRegIdx != NumOps; ++ArgRegIdx) { + SDValue Arg = OutVals[ArgRegIdx]; + CCValAssign &VA = ArgLocs[ArgRegIdx]; + + // PtrOff will be used to store the current argument to the stack if a + // register cannot be found for it. + SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); + PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); + + switch (Arg.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected ValueType for argument!"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::i128: + case MVT::f32: + case MVT::f64: + case MVT::v2i64: + case MVT::v2f64: + case MVT::v4f32: + case MVT::v4i32: + case MVT::v8i16: + case MVT::v16i8: + if (ArgRegIdx != NumArgRegs) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else { + MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo(), + false, false, 0)); + ArgOffset += StackSlotSize; + } + break; + } + } + + // Accumulate how many bytes are to be pushed on the stack, including the + // linkage area, and parameter passing area. According to the SPU ABI, + // we minimally need space for [LR] and [SP]. + unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize(); + + // Insert a call sequence start + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, + true)); + + if (!MemOpChains.empty()) { + // Adjust the stack pointer for the stack arguments. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOpChains[0], MemOpChains.size()); + } + + // Build a sequence of copy-to-reg nodes chained together with token chain + // and flag operands which copy the outgoing args into the appropriate regs. + SDValue InFlag; + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + SmallVector<SDValue, 8> Ops; + unsigned CallOpc = SPUISD::CALL; + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every + // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + const GlobalValue *GV = G->getGlobal(); + EVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); + + if (!ST->usingLargeMem()) { + // Turn calls to targets that are defined (i.e., have bodies) into BRSL + // style calls, otherwise, external symbols are BRASL calls. This assumes + // that declared/defined symbols are in the same compilation unit and can + // be reached through PC-relative jumps. + // + // NOTE: + // This may be an unsafe assumption for JIT and really large compilation + // units. + if (GV->isDeclaration()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); + } else { + Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); + } + } else { + // "Large memory" mode: Turn all calls into indirect calls with a X-form + // address pairs: + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); + } + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + EVT CalleeVT = Callee.getValueType(); + SDValue Zero = DAG.getConstant(0, PtrVT); + SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), + Callee.getValueType()); + + if (!ST->usingLargeMem()) { + Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); + } else { + Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); + } + } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { + // If this is an absolute destination address that appears to be a legal + // local store address, use the munged value. + Callee = SDValue(Dest, 0); + } + + Ops.push_back(Chain); + Ops.push_back(Callee); + + // Add argument registers to the end of the list so that they are known live + // into the call. + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) + Ops.push_back(DAG.getRegister(RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + + if (InFlag.getNode()) + Ops.push_back(InFlag); + // Returns a chain and a flag for retval copy to use. + Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue), + &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), + DAG.getIntPtrConstant(0, true), InFlag); + if (!Ins.empty()) + InFlag = Chain.getValue(1); + + // If the function returns void, just return the chain. + if (Ins.empty()) + return Chain; + + // Now handle the return value(s) + SmallVector<CCValAssign, 16> RVLocs; + CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU); + + + // If the call has results, copy the values out of the ret val registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign VA = RVLocs[i]; + + SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), + InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + InVals.push_back(Val); + } + + return Chain; +} + +SDValue +SPUTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const { + + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeReturn(Outs, RetCC_SPU); + + // If this is the first return lowered for this function, add the regs to the + // liveout set for the function. + if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { + for (unsigned i = 0; i != RVLocs.size(); ++i) + DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); + } + + SDValue Flag; + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + OutVals[i], Flag); + Flag = Chain.getValue(1); + } + + if (Flag.getNode()) + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); + else + return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); +} + + +//===----------------------------------------------------------------------===// +// Vector related lowering: +//===----------------------------------------------------------------------===// + +static ConstantSDNode * +getVecImm(SDNode *N) { + SDValue OpVal(0, 0); + + // Check to see if this buildvec has a single non-undef value in its elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + else if (OpVal != N->getOperand(i)) + return 0; + } + + if (OpVal.getNode() != 0) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { + return CN; + } + } + + return 0; +} + +/// get_vec_i18imm - Test if this vector is a vector filled with the same value +/// and the value fits into an unsigned 18-bit constant, and if so, return the +/// constant +SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getZExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (Value <= 0x3ffff) + return DAG.getTargetConstant(Value, ValueType); + } + + return SDValue(); +} + +/// get_vec_i16imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int64_t Value = CN->getSExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { + return DAG.getTargetConstant(Value, ValueType); + } + } + + return SDValue(); +} + +/// get_vec_i10imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 10-bit constant, and if so, return the +/// constant +SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int64_t Value = CN->getSExtValue(); + if (ValueType == MVT::i64) { + uint64_t UValue = CN->getZExtValue(); + uint32_t upper = uint32_t(UValue >> 32); + uint32_t lower = uint32_t(UValue); + if (upper != lower) + return SDValue(); + Value = Value >> 32; + } + if (isInt<10>(Value)) + return DAG.getTargetConstant(Value, ValueType); + } + + return SDValue(); +} + +/// get_vec_i8imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 8-bit constant, and if so, return the +/// constant. +/// +/// @note: The incoming vector is v16i8 because that's the only way we can load +/// constant vectors. Thus, we test to see if the upper and lower bytes are the +/// same value. +SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + int Value = (int) CN->getZExtValue(); + if (ValueType == MVT::i16 + && Value <= 0xffff /* truncated from uint64_t */ + && ((short) Value >> 8) == ((short) Value & 0xff)) + return DAG.getTargetConstant(Value & 0xff, ValueType); + else if (ValueType == MVT::i8 + && (Value & 0xff) == Value) + return DAG.getTargetConstant(Value, ValueType); + } + + return SDValue(); +} + +/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value +/// and the value fits into a signed 16-bit constant, and if so, return the +/// constant +SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType) { + if (ConstantSDNode *CN = getVecImm(N)) { + uint64_t Value = CN->getZExtValue(); + if ((ValueType == MVT::i32 + && ((unsigned) Value & 0xffff0000) == (unsigned) Value) + || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) + return DAG.getTargetConstant(Value >> 16, ValueType); + } + + return SDValue(); +} + +/// get_v4i32_imm - Catch-all for general 32-bit constant vectors +SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); + } + + return SDValue(); +} + +/// get_v4i32_imm - Catch-all for general 64-bit constant vectors +SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { + if (ConstantSDNode *CN = getVecImm(N)) { + return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); + } + + return SDValue(); +} + +//! Lower a BUILD_VECTOR instruction creatively: +static SDValue +LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = Op.getDebugLoc(); + BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); + unsigned minSplatBits = EltVT.getSizeInBits(); + + if (minSplatBits < 16) + minSplatBits = 16; + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + || minSplatBits < SplatBitSize) + return SDValue(); // Wasn't a constant vector or splat exceeded min + + uint64_t SplatBits = APSplatBits.getZExtValue(); + + switch (VT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + + Twine(VT.getEVTString())); + /*NOTREACHED*/ + case MVT::v4f32: { + uint32_t Value32 = uint32_t(SplatBits); + assert(SplatBitSize == 32 + && "LowerBUILD_VECTOR: Unexpected floating point vector element."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDValue T = DAG.getConstant(Value32, MVT::i32); + return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); + } + case MVT::v2f64: { + uint64_t f64val = uint64_t(SplatBits); + assert(SplatBitSize == 64 + && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); + // NOTE: pretend the constant is an integer. LLVM won't load FP constants + SDValue T = DAG.getConstant(f64val, MVT::i64); + return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); + } + case MVT::v16i8: { + // 8-bit constants have to be expanded to 16-bits + unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; + SmallVector<SDValue, 8> Ops; + + Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); + } + case MVT::v8i16: { + unsigned short Value16 = SplatBits; + SDValue T = DAG.getConstant(Value16, EltVT); + SmallVector<SDValue, 8> Ops; + + Ops.assign(8, T); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); + } + case MVT::v4i32: { + SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); + } + case MVT::v2i64: { + return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); + } + } +} + +/*! + */ +SDValue +SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, + DebugLoc dl) { + uint32_t upper = uint32_t(SplatVal >> 32); + uint32_t lower = uint32_t(SplatVal); + + if (upper == lower) { + // Magic constant that can be matched by IL, ILA, et. al. + SDValue Val = DAG.getTargetConstant(upper, MVT::i32); + return DAG.getNode(ISD::BITCAST, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + Val, Val, Val, Val)); + } else { + bool upper_special, lower_special; + + // NOTE: This code creates common-case shuffle masks that can be easily + // detected as common expressions. It is not attempting to create highly + // specialized masks to replace any and all 0's, 0xff's and 0x80's. + + // Detect if the upper or lower half is a special shuffle mask pattern: + upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); + lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); + + // Both upper and lower are special, lower to a constant pool load: + if (lower_special && upper_special) { + SDValue UpperVal = DAG.getConstant(upper, MVT::i32); + SDValue LowerVal = DAG.getConstant(lower, MVT::i32); + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + UpperVal, LowerVal, UpperVal, LowerVal); + return DAG.getNode(ISD::BITCAST, dl, OpVT, BV); + } + + SDValue LO32; + SDValue HI32; + SmallVector<SDValue, 16> ShufBytes; + SDValue Result; + + // Create lower vector if not a special pattern + if (!lower_special) { + SDValue LO32C = DAG.getConstant(lower, MVT::i32); + LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + LO32C, LO32C, LO32C, LO32C)); + } + + // Create upper vector if not a special pattern + if (!upper_special) { + SDValue HI32C = DAG.getConstant(upper, MVT::i32); + HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + HI32C, HI32C, HI32C, HI32C)); + } + + // If either upper or lower are special, then the two input operands are + // the same (basically, one of them is a "don't care") + if (lower_special) + LO32 = HI32; + if (upper_special) + HI32 = LO32; + + for (int i = 0; i < 4; ++i) { + uint64_t val = 0; + for (int j = 0; j < 4; ++j) { + SDValue V; + bool process_upper, process_lower; + val <<= 8; + process_upper = (upper_special && (i & 1) == 0); + process_lower = (lower_special && (i & 1) == 1); + + if (process_upper || process_lower) { + if ((process_upper && upper == 0) + || (process_lower && lower == 0)) + val |= 0x80; + else if ((process_upper && upper == 0xffffffff) + || (process_lower && lower == 0xffffffff)) + val |= 0xc0; + else if ((process_upper && upper == 0x80000000) + || (process_lower && lower == 0x80000000)) + val |= (j == 0 ? 0xe0 : 0x80); + } else + val |= i * 4 + j + ((i & 1) * 16); + } + + ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); + } + + return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufBytes[0], ShufBytes.size())); + } +} + +/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on +/// which the Cell can operate. The code inspects V3 to ascertain whether the +/// permutation vector, V3, is monotonically increasing with one "exception" +/// element, e.g., (0, 1, _, 3). If this is the case, then generate a +/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. +/// In either case, the net result is going to eventually invoke SHUFB to +/// permute/shuffle the bytes from V1 and V2. +/// \note +/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate +/// control word for byte/halfword/word insertion. This takes care of a single +/// element move from V2 into V1. +/// \note +/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. +static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + if (V2.getOpcode() == ISD::UNDEF) V2 = V1; + + // If we have a single element being moved from V1 to V2, this can be handled + // using the C*[DX] compute mask instructions, but the vector elements have + // to be monotonically increasing with one exception element, and the source + // slot of the element to move must be the same as the destination. + EVT VecVT = V1.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + unsigned EltsFromV2 = 0; + unsigned V2EltOffset = 0; + unsigned V2EltIdx0 = 0; + unsigned CurrElt = 0; + unsigned MaxElts = VecVT.getVectorNumElements(); + unsigned PrevElt = 0; + bool monotonic = true; + bool rotate = true; + int rotamt=0; + EVT maskVT; // which of the c?d instructions to use + + if (EltVT == MVT::i8) { + V2EltIdx0 = 16; + maskVT = MVT::v16i8; + } else if (EltVT == MVT::i16) { + V2EltIdx0 = 8; + maskVT = MVT::v8i16; + } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { + V2EltIdx0 = 4; + maskVT = MVT::v4i32; + } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { + V2EltIdx0 = 2; + maskVT = MVT::v2i64; + } else + llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); + + for (unsigned i = 0; i != MaxElts; ++i) { + if (SVN->getMaskElt(i) < 0) + continue; + + unsigned SrcElt = SVN->getMaskElt(i); + + if (monotonic) { + if (SrcElt >= V2EltIdx0) { + // TODO: optimize for the monotonic case when several consecutive + // elements are taken form V2. Do we ever get such a case? + if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0)) + V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8); + else + monotonic = false; + ++EltsFromV2; + } else if (CurrElt != SrcElt) { + monotonic = false; + } + + ++CurrElt; + } + + if (rotate) { + if (PrevElt > 0 && SrcElt < MaxElts) { + if ((PrevElt == SrcElt - 1) + || (PrevElt == MaxElts - 1 && SrcElt == 0)) { + PrevElt = SrcElt; + } else { + rotate = false; + } + } else if (i == 0 || (PrevElt==0 && SrcElt==1)) { + // First time or after a "wrap around" + rotamt = SrcElt-i; + PrevElt = SrcElt; + } else { + // This isn't a rotation, takes elements from vector 2 + rotate = false; + } + } + } + + if (EltsFromV2 == 1 && monotonic) { + // Compute mask and shuffle + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + + // As SHUFFLE_MASK becomes a c?d instruction, feed it an address + // R1 ($sp) is used here only as it is guaranteed to have last bits zero + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(V2EltOffset, MVT::i32)); + SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, + maskVT, Pointer); + + // Use shuffle mask in SHUFB synthetic instruction: + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, + ShufMaskOp); + } else if (rotate) { + if (rotamt < 0) + rotamt +=MaxElts; + rotamt *= EltVT.getSizeInBits()/8; + return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), + V1, DAG.getConstant(rotamt, MVT::i16)); + } else { + // Convert the SHUFFLE_VECTOR mask's input element units to the + // actual bytes. + unsigned BytesPerElement = EltVT.getSizeInBits()/8; + + SmallVector<SDValue, 16> ResultMask; + for (unsigned i = 0, e = MaxElts; i != e; ++i) { + unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); + + for (unsigned j = 0; j < BytesPerElement; ++j) + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); + } + SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, + &ResultMask[0], ResultMask.size()); + return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); + } +} + +static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { + SDValue Op0 = Op.getOperand(0); // Op0 = the scalar + DebugLoc dl = Op.getDebugLoc(); + + if (Op0.getNode()->getOpcode() == ISD::Constant) { + // For a constant, build the appropriate constant vector, which will + // eventually simplify to a vector register load. + + ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); + SmallVector<SDValue, 16> ConstVecValues; + EVT VT; + size_t n_copies; + + // Create a constant vector: + switch (Op.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected constant value type in " + "LowerSCALAR_TO_VECTOR"); + case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; + case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; + case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; + case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; + case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; + case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; + } + + SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); + for (size_t j = 0; j < n_copies; ++j) + ConstVecValues.push_back(CValue); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), + &ConstVecValues[0], ConstVecValues.size()); + } else { + // Otherwise, copy the value from one register to another: + switch (Op0.getValueType().getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); + } + } +} + +static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + SDValue N = Op.getOperand(0); + SDValue Elt = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + SDValue retval; + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { + // Constant argument: + int EltNo = (int) C->getZExtValue(); + + // sanity checks: + if (VT == MVT::i8 && EltNo >= 16) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); + else if (VT == MVT::i16 && EltNo >= 8) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); + else if (VT == MVT::i32 && EltNo >= 4) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); + else if (VT == MVT::i64 && EltNo >= 2) + llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); + + if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { + // i32 and i64: Element 0 is the preferred slot + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); + } + + // Need to generate shuffle mask and extract: + int prefslot_begin = -1, prefslot_end = -1; + int elt_byte = EltNo * VT.getSizeInBits() / 8; + + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid value type!"); + case MVT::i8: { + prefslot_begin = prefslot_end = 3; + break; + } + case MVT::i16: { + prefslot_begin = 2; prefslot_end = 3; + break; + } + case MVT::i32: + case MVT::f32: { + prefslot_begin = 0; prefslot_end = 3; + break; + } + case MVT::i64: + case MVT::f64: { + prefslot_begin = 0; prefslot_end = 7; + break; + } + } + + assert(prefslot_begin != -1 && prefslot_end != -1 && + "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); + + unsigned int ShufBytes[16] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + for (int i = 0; i < 16; ++i) { + // zero fill uppper part of preferred slot, don't care about the + // other slots: + unsigned int mask_val; + if (i <= prefslot_end) { + mask_val = + ((i < prefslot_begin) + ? 0x80 + : elt_byte + (i - prefslot_begin)); + + ShufBytes[i] = mask_val; + } else + ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; + } + + SDValue ShufMask[4]; + for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { + unsigned bidx = i * 4; + unsigned int bits = ((ShufBytes[bidx] << 24) | + (ShufBytes[bidx+1] << 16) | + (ShufBytes[bidx+2] << 8) | + ShufBytes[bidx+3]); + ShufMask[i] = DAG.getConstant(bits, MVT::i32); + } + + SDValue ShufMaskVec = + DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); + + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), + N, N, ShufMaskVec)); + } else { + // Variable index: Rotate the requested element into slot 0, then replicate + // slot 0 across the vector + EVT VecVT = N.getValueType(); + if (!VecVT.isSimple() || !VecVT.isVector()) { + report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" + "vector type!"); + } + + // Make life easier by making sure the index is zero-extended to i32 + if (Elt.getValueType() != MVT::i32) + Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); + + // Scale the index to a bit/byte shift quantity + APInt scaleFactor = + APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); + unsigned scaleShift = scaleFactor.logBase2(); + SDValue vecShift; + + if (scaleShift > 0) { + // Scale the shift factor: + Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, + DAG.getConstant(scaleShift, MVT::i32)); + } + + vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt); + + // Replicate the bytes starting at byte 0 across the entire vector (for + // consistency with the notion of a unified register set) + SDValue replicate; + + switch (VT.getSimpleVT().SimpleTy) { + default: + report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" + "type"); + /*NOTREACHED*/ + case MVT::i8: { + SDValue factor = DAG.getConstant(0x00000000, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i16: { + SDValue factor = DAG.getConstant(0x00010001, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i32: + case MVT::f32: { + SDValue factor = DAG.getConstant(0x00010203, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + factor, factor, factor, factor); + break; + } + case MVT::i64: + case MVT::f64: { + SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); + SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); + replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + loFactor, hiFactor, loFactor, hiFactor); + break; + } + } + + retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, + DAG.getNode(SPUISD::SHUFB, dl, VecVT, + vecShift, vecShift, replicate)); + } + + return retval; +} + +static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + SDValue VecOp = Op.getOperand(0); + SDValue ValOp = Op.getOperand(1); + SDValue IdxOp = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + EVT VT = Op.getValueType(); + EVT eltVT = ValOp.getValueType(); + + // use 0 when the lane to insert to is 'undef' + int64_t Offset=0; + if (IdxOp.getOpcode() != ISD::UNDEF) { + ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8; + } + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + // Use $sp ($1) because it's always 16-byte aligned and it's available: + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(Offset, PtrVT)); + // widen the mask when dealing with half vectors + EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(), + 128/ VT.getVectorElementType().getSizeInBits()); + SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer); + + SDValue result = + DAG.getNode(SPUISD::SHUFB, dl, VT, + DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), + VecOp, + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask)); + + return result; +} + +static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, + const TargetLowering &TLI) +{ + SDValue N0 = Op.getOperand(0); // Everything has at least one operand + DebugLoc dl = Op.getDebugLoc(); + EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); + + assert(Op.getValueType() == MVT::i8); + switch (Opc) { + default: + llvm_unreachable("Unhandled i8 math operator"); + case ISD::ADD: { + // 8-bit addition: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + + } + + case ISD::SUB: { + // 8-bit subtraction: Promote the arguments up to 16-bits and truncate + // the result: + SDValue N1 = Op.getOperand(1); + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + case ISD::ROTR: + case ISD::ROTL: { + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) + ? ISD::ZERO_EXTEND + : ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + // Replicate lower 8-bits into upper 8: + SDValue ExpandArg = + DAG.getNode(ISD::OR, dl, MVT::i16, N0, + DAG.getNode(ISD::SHL, dl, MVT::i16, + N0, DAG.getConstant(8, MVT::i32))); + + // Truncate back down to i8 + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); + } + case ISD::SRL: + case ISD::SHL: { + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::ZERO_EXTEND; + + if (N1.getValueType().bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + case ISD::SRA: { + SDValue N1 = Op.getOperand(1); + EVT N1VT = N1.getValueType(); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + if (!N1VT.bitsEq(ShiftVT)) { + unsigned N1Opc = ISD::SIGN_EXTEND; + + if (N1VT.bitsGT(ShiftVT)) + N1Opc = ISD::TRUNCATE; + N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); + } + + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + case ISD::MUL: { + SDValue N1 = Op.getOperand(1); + + N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, + DAG.getNode(Opc, dl, MVT::i16, N0, N1)); + } + } +} + +//! Lower byte immediate operations for v16i8 vectors: +static SDValue +LowerByteImmed(SDValue Op, SelectionDAG &DAG) { + SDValue ConstVec; + SDValue Arg; + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + ConstVec = Op.getOperand(0); + Arg = Op.getOperand(1); + if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { + if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { + ConstVec = ConstVec.getOperand(0); + } else { + ConstVec = Op.getOperand(1); + Arg = Op.getOperand(0); + if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) { + ConstVec = ConstVec.getOperand(0); + } + } + } + + if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { + BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); + assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); + + APInt APSplatBits, APSplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); + + if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, + HasAnyUndefs, minSplatBits) + && minSplatBits <= SplatBitSize) { + uint64_t SplatBits = APSplatBits.getZExtValue(); + SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); + + SmallVector<SDValue, 16> tcVec; + tcVec.assign(16, tc); + return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); + } + } + + // These operations (AND, OR, XOR) are legal, they just couldn't be custom + // lowered. Return the operation, rather than a null SDValue. + return Op; +} + +//! Custom lowering for CTPOP (count population) +/*! + Custom lowering code that counts the number ones in the input + operand. SPU has such an instruction, but it counts the number of + ones per byte, which then have to be accumulated. +*/ +static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + EVT vecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Invalid value type!"); + case MVT::i8: { + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i32); + + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); + + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); + } + + case MVT::i16: { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); + + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i16); + SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); + SDValue Shift1 = DAG.getConstant(8, MVT::i32); + + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDValue CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); + + SDValue CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); + + SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); + + return DAG.getNode(ISD::AND, dl, MVT::i16, + DAG.getNode(ISD::ADD, dl, MVT::i16, + DAG.getNode(ISD::SRL, dl, MVT::i16, + Tmp1, Shift1), + Tmp1), + Mask0); + } + + case MVT::i32: { + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); + + SDValue N = Op.getOperand(0); + SDValue Elt0 = DAG.getConstant(0, MVT::i32); + SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); + SDValue Shift1 = DAG.getConstant(16, MVT::i32); + SDValue Shift2 = DAG.getConstant(8, MVT::i32); + + SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); + SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); + + // CNTB_result becomes the chain to which all of the virtual registers + // CNTB_reg, SUM1_reg become associated: + SDValue CNTB_result = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); + + SDValue CNTB_rescopy = + DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); + + SDValue Comp1 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), + Shift1); + + SDValue Sum1 = + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, + DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); + + SDValue Sum1_rescopy = + DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); + + SDValue Comp2 = + DAG.getNode(ISD::SRL, dl, MVT::i32, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), + Shift2); + SDValue Sum2 = + DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, + DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); + + return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); + } + + case MVT::i64: + break; + } + + return SDValue(); +} + +//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 +/*! + f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. + All conversions to i64 are expanded to a libcall. + */ +static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, + const SPUTargetLowering &TLI) { + EVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::i32 && Op0VT == MVT::f64) + || OpVT == MVT::i64) { + // Convert f32 / f64 to i32 / i64 via libcall. + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::FP_TO_SINT) + ? RTLIB::getFPTOSINT(Op0VT, OpVT) + : RTLIB::getFPTOUINT(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 +/*! + i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. + All conversions from i64 are expanded to a libcall. + */ +static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, + const SPUTargetLowering &TLI) { + EVT OpVT = Op.getValueType(); + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if ((OpVT == MVT::f64 && Op0VT == MVT::i32) + || Op0VT == MVT::i64) { + // Convert i32, i64 to f64 via libcall: + RTLIB::Libcall LC = + (Op.getOpcode() == ISD::SINT_TO_FP) + ? RTLIB::getSINTTOFP(Op0VT, OpVT) + : RTLIB::getUINTTOFP(Op0VT, OpVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); + SDValue Dummy; + return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); + } + + return Op; +} + +//! Lower ISD::SETCC +/*! + This handles MVT::f64 (double floating point) condition lowering + */ +static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); + DebugLoc dl = Op.getDebugLoc(); + assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); + + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + EVT lhsVT = lhs.getValueType(); + assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); + + EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); + APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + EVT IntVT(MVT::i64); + + // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently + // selected to a NOP: + SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs); + SDValue lhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64lhs, DAG.getConstant(32, MVT::i32))); + SDValue lhsHi32abs = + DAG.getNode(ISD::AND, dl, MVT::i32, + lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); + SDValue lhsLo32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); + + // SETO and SETUO only use the lhs operand: + if (CC->get() == ISD::SETO) { + // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of + // SETUO + APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); + return DAG.getNode(ISD::XOR, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, lhsVT), + ISD::SETUO), + DAG.getConstant(ccResultAllOnes, ccResultVT)); + } else if (CC->get() == ISD::SETUO) { + // Evaluates to true if Op0 is [SQ]NaN + return DAG.getNode(ISD::AND, dl, ccResultVT, + DAG.getSetCC(dl, ccResultVT, + lhsHi32abs, + DAG.getConstant(0x7ff00000, MVT::i32), + ISD::SETGE), + DAG.getSetCC(dl, ccResultVT, + lhsLo32, + DAG.getConstant(0, MVT::i32), + ISD::SETGT)); + } + + SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs); + SDValue rhsHi32 = + DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::SRL, dl, IntVT, + i64rhs, DAG.getConstant(32, MVT::i32))); + + // If a value is negative, subtract from the sign magnitude constant: + SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); + + // Convert the sign-magnitude representation into 2's complement: + SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + lhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); + SDValue lhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + lhsSelectMask, lhsSignMag2TC, i64lhs); + + SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, + rhsHi32, DAG.getConstant(31, MVT::i32)); + SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); + SDValue rhsSelect = + DAG.getNode(ISD::SELECT, dl, IntVT, + rhsSelectMask, rhsSignMag2TC, i64rhs); + + unsigned compareOp; + + switch (CC->get()) { + case ISD::SETOEQ: + case ISD::SETUEQ: + compareOp = ISD::SETEQ; break; + case ISD::SETOGT: + case ISD::SETUGT: + compareOp = ISD::SETGT; break; + case ISD::SETOGE: + case ISD::SETUGE: + compareOp = ISD::SETGE; break; + case ISD::SETOLT: + case ISD::SETULT: + compareOp = ISD::SETLT; break; + case ISD::SETOLE: + case ISD::SETULE: + compareOp = ISD::SETLE; break; + case ISD::SETUNE: + case ISD::SETONE: + compareOp = ISD::SETNE; break; + default: + report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); + } + + SDValue result = + DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, + (ISD::CondCode) compareOp); + + if ((CC->get() & 0x8) == 0) { + // Ordered comparison: + SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, + lhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, + rhs, DAG.getConstantFP(0.0, MVT::f64), + ISD::SETO); + SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); + + result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); + } + + return result; +} + +//! Lower ISD::SELECT_CC +/*! + ISD::SELECT_CC can (generally) be implemented directly on the SPU using the + SELB instruction. + + \note Need to revisit this in the future: if the code path through the true + and false value computations is longer than the latency of a branch (6 + cycles), then it would be more advantageous to branch and insert a new basic + block and branch on the condition. However, this code does not make that + assumption, given the simplisitc uses so far. + */ + +static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT = Op.getValueType(); + SDValue lhs = Op.getOperand(0); + SDValue rhs = Op.getOperand(1); + SDValue trueval = Op.getOperand(2); + SDValue falseval = Op.getOperand(3); + SDValue condition = Op.getOperand(4); + DebugLoc dl = Op.getDebugLoc(); + + // NOTE: SELB's arguments: $rA, $rB, $mask + // + // SELB selects bits from $rA where bits in $mask are 0, bits from $rB + // where bits in $mask are 1. CCond will be inverted, having 1s where the + // condition was true and 0s where the condition was false. Hence, the + // arguments to SELB get reversed. + + // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's + // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up + // with another "cannot select select_cc" assert: + + SDValue compare = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(Op.getValueType()), + lhs, rhs, condition); + return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); +} + +//! Custom lower ISD::TRUNCATE +static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) +{ + // Type to truncate to + EVT VT = Op.getValueType(); + MVT simpleVT = VT.getSimpleVT(); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), + VT, (128 / VT.getSizeInBits())); + DebugLoc dl = Op.getDebugLoc(); + + // Type to truncate from + SDValue Op0 = Op.getOperand(0); + EVT Op0VT = Op0.getValueType(); + + if (Op0VT == MVT::i128 && simpleVT == MVT::i64) { + // Create shuffle mask, least significant doubleword of quadword + unsigned maskHigh = 0x08090a0b; + unsigned maskLow = 0x0c0d0e0f; + // Use a shuffle to perform the truncation + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32), + DAG.getConstant(maskHigh, MVT::i32), + DAG.getConstant(maskLow, MVT::i32)); + + SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, + Op0, Op0, shufMask); + + return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); + } + + return SDValue(); // Leave the truncate unmolested +} + +/*! + * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic + * algorithm is to duplicate the sign bit using rotmai to generate at + * least one byte full of sign bits. Then propagate the "sign-byte" into + * the leftmost words and the i64/i32 into the rightmost words using shufb. + * + * @param Op The sext operand + * @param DAG The current DAG + * @return The SDValue with the entire instruction sequence + */ +static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc dl = Op.getDebugLoc(); + + // Type to extend to + MVT OpVT = Op.getValueType().getSimpleVT(); + + // Type to extend from + SDValue Op0 = Op.getOperand(0); + MVT Op0VT = Op0.getValueType().getSimpleVT(); + + // extend i8 & i16 via i32 + if (Op0VT == MVT::i8 || Op0VT == MVT::i16) { + Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0); + Op0VT = MVT::i32; + } + + // The type to extend to needs to be a i128 and + // the type to extend from needs to be i64 or i32. + assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && + "LowerSIGN_EXTEND: input and/or output operand have wrong size"); + (void)OpVT; + + // Create shuffle mask + unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 + unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 + unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 + SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask1, MVT::i32), + DAG.getConstant(mask2, MVT::i32), + DAG.getConstant(mask3, MVT::i32)); + + // Word wise arithmetic right shift to generate at least one byte + // that contains sign bits. + MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; + SDValue sraVal = DAG.getNode(ISD::SRA, + dl, + mvt, + DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), + DAG.getConstant(31, MVT::i32)); + + // reinterpret as a i128 (SHUFB requires it). This gets lowered away. + SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, + dl, Op0VT, Op0, + DAG.getTargetConstant( + SPU::GPRCRegClass.getID(), + MVT::i32)), 0); + // Shuffle bytes - Copy the sign bits into the upper 64 bits + // and the input value into the lower 64 bits. + SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, + extended, sraVal, shufMask); + return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle); +} + +//! Custom (target-specific) lowering entry point +/*! + This is where LLVM's DAG selection process calls to do target-specific + lowering of nodes. + */ +SDValue +SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + unsigned Opc = (unsigned) Op.getOpcode(); + EVT VT = Op.getValueType(); + + switch (Opc) { + default: { +#ifndef NDEBUG + errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; + Op.getNode()->dump(); +#endif + llvm_unreachable(0); + } + case ISD::LOAD: + case ISD::EXTLOAD: + case ISD::SEXTLOAD: + case ISD::ZEXTLOAD: + return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::STORE: + return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::ConstantPool: + return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); + case ISD::ConstantFP: + return LowerConstantFP(Op, DAG); + + // i8, i64 math ops: + case ISD::ADD: + case ISD::SUB: + case ISD::ROTR: + case ISD::ROTL: + case ISD::SRL: + case ISD::SHL: + case ISD::SRA: { + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); + break; + } + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + return LowerFP_TO_INT(Op, DAG, *this); + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return LowerINT_TO_FP(Op, DAG, *this); + + // Vector-related lowering. + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG); + case ISD::SCALAR_TO_VECTOR: + return LowerSCALAR_TO_VECTOR(Op, DAG); + case ISD::VECTOR_SHUFFLE: + return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: + return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: + return LowerINSERT_VECTOR_ELT(Op, DAG); + + // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return LowerByteImmed(Op, DAG); + + // Vector and i8 multiply: + case ISD::MUL: + if (VT == MVT::i8) + return LowerI8Math(Op, DAG, Opc, *this); + + case ISD::CTPOP: + return LowerCTPOP(Op, DAG); + + case ISD::SELECT_CC: + return LowerSELECT_CC(Op, DAG, *this); + + case ISD::SETCC: + return LowerSETCC(Op, DAG, *this); + + case ISD::TRUNCATE: + return LowerTRUNCATE(Op, DAG); + + case ISD::SIGN_EXTEND: + return LowerSIGN_EXTEND(Op, DAG); + } + + return SDValue(); +} + +void SPUTargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const +{ +#if 0 + unsigned Opc = (unsigned) N->getOpcode(); + EVT OpVT = N->getValueType(0); + + switch (Opc) { + default: { + errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; + errs() << "Op.getOpcode() = " << Opc << "\n"; + errs() << "*Op.getNode():\n"; + N->dump(); + abort(); + /*NOTREACHED*/ + } + } +#endif + + /* Otherwise, return unchanged */ +} + +//===----------------------------------------------------------------------===// +// Target Optimization Hooks +//===----------------------------------------------------------------------===// + +SDValue +SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const +{ +#if 0 + TargetMachine &TM = getTargetMachine(); +#endif + const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); + SelectionDAG &DAG = DCI.DAG; + SDValue Op0 = N->getOperand(0); // everything has at least one operand + EVT NodeVT = N->getValueType(0); // The node's value type + EVT Op0VT = Op0.getValueType(); // The first operand's result + SDValue Result; // Initially, empty result + DebugLoc dl = N->getDebugLoc(); + + switch (N->getOpcode()) { + default: break; + case ISD::ADD: { + SDValue Op1 = N->getOperand(1); + + if (Op0.getOpcode() == SPUISD::IndirectAddr + || Op1.getOpcode() == SPUISD::IndirectAddr) { + // Normalize the operands to reduce repeated code + SDValue IndirectArg = Op0, AddArg = Op1; + + if (Op1.getOpcode() == SPUISD::IndirectAddr) { + IndirectArg = Op1; + AddArg = Op0; + } + + if (isa<ConstantSDNode>(AddArg)) { + ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); + SDValue IndOp1 = IndirectArg.getOperand(1); + + if (CN0->isNullValue()) { + // (add (SPUindirect <arg>, <arg>), 0) -> + // (SPUindirect <arg>, <arg>) + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" + << "With: (SPUindirect <arg>, <arg>)\n"; + } +#endif + + return IndirectArg; + } else if (isa<ConstantSDNode>(IndOp1)) { + // (add (SPUindirect <arg>, <const>), <const>) -> + // (SPUindirect <arg>, <const + const>) + ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); + int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); + SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() + << "), " << CN0->getSExtValue() << ")\n" + << "With: (SPUindirect <arg>, " + << combinedConst << ")\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + IndirectArg, combinedValue); + } + } + } + break; + } + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: { + if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { + // (any_extend (SPUextract_elt0 <arg>)) -> + // (SPUextract_elt0 <arg>) + // Types must match, however... +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\nReplace: "; + N->dump(&DAG); + errs() << "\nWith: "; + Op0.getNode()->dump(&DAG); + errs() << "\n"; + } +#endif + + return Op0; + } + break; + } + case SPUISD::IndirectAddr: { + if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (CN != 0 && CN->isNullValue()) { + // (SPUindirect (SPUaform <addr>, 0), 0) -> + // (SPUaform <addr>, 0) + + DEBUG(errs() << "Replace: "); + DEBUG(N->dump(&DAG)); + DEBUG(errs() << "\nWith: "); + DEBUG(Op0.getNode()->dump(&DAG)); + DEBUG(errs() << "\n"); + + return Op0; + } + } else if (Op0.getOpcode() == ISD::ADD) { + SDValue Op1 = N->getOperand(1); + if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { + // (SPUindirect (add <arg>, <arg>), 0) -> + // (SPUindirect <arg>, <arg>) + if (CN1->isNullValue()) { + +#if !defined(NDEBUG) + if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { + errs() << "\n" + << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" + << "With: (SPUindirect <arg>, <arg>)\n"; + } +#endif + + return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, + Op0.getOperand(0), Op0.getOperand(1)); + } + } + } + break; + } + case SPUISD::SHL_BITS: + case SPUISD::SHL_BYTES: + case SPUISD::ROTBYTES_LEFT: { + SDValue Op1 = N->getOperand(1); + + // Kill degenerate vector shifts: + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { + if (CN->isNullValue()) { + Result = Op0; + } + } + break; + } + case SPUISD::PREFSLOT2VEC: { + switch (Op0.getOpcode()) { + default: + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: { + // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> + // <arg> + // but only if the SPUprefslot2vec and <arg> types match. + SDValue Op00 = Op0.getOperand(0); + if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { + SDValue Op000 = Op00.getOperand(0); + if (Op000.getValueType() == NodeVT) { + Result = Op000; + } + } + break; + } + case SPUISD::VEC2PREFSLOT: { + // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> + // <arg> + Result = Op0.getOperand(0); + break; + } + } + break; + } + } + + // Otherwise, return unchanged. +#ifndef NDEBUG + if (Result.getNode()) { + DEBUG(errs() << "\nReplace.SPU: "); + DEBUG(N->dump(&DAG)); + DEBUG(errs() << "\nWith: "); + DEBUG(Result.getNode()->dump(&DAG)); + DEBUG(errs() << "\n"); + } +#endif + + return Result; +} + +//===----------------------------------------------------------------------===// +// Inline Assembly Support +//===----------------------------------------------------------------------===// + +/// getConstraintType - Given a constraint letter, return the type of +/// constraint it is for this target. +SPUTargetLowering::ConstraintType +SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { + if (ConstraintLetter.size() == 1) { + switch (ConstraintLetter[0]) { + default: break; + case 'b': + case 'r': + case 'f': + case 'v': + case 'y': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(ConstraintLetter); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight +SPUTargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (CallOperandVal == NULL) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + default: + weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); + break; + //FIXME: Seems like the supported constraint letters were just copied + // from PPC, as the following doesn't correspond to the GCC docs. + // I'm leaving it so until someone adds the corresponding lowering support. + case 'b': + case 'r': + case 'f': + case 'd': + case 'v': + case 'y': + weight = CW_Register; + break; + } + return weight; +} + +std::pair<unsigned, const TargetRegisterClass*> +SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const +{ + if (Constraint.size() == 1) { + // GCC RS6000 Constraint Letters + switch (Constraint[0]) { + case 'b': // R1-R31 + case 'r': // R0-R31 + if (VT == MVT::i64) + return std::make_pair(0U, SPU::R64CRegisterClass); + return std::make_pair(0U, SPU::R32CRegisterClass); + case 'f': + if (VT == MVT::f32) + return std::make_pair(0U, SPU::R32FPRegisterClass); + else if (VT == MVT::f64) + return std::make_pair(0U, SPU::R64FPRegisterClass); + break; + case 'v': + return std::make_pair(0U, SPU::GPRCRegisterClass); + } + } + + return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); +} + +//! Compute used/known bits for a SPU operand +void +SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth ) const { +#if 0 + const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; + + switch (Op.getOpcode()) { + default: + // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); + break; + case CALL: + case SHUFB: + case SHUFFLE_MASK: + case CNTB: + case SPUISD::PREFSLOT2VEC: + case SPUISD::LDRESULT: + case SPUISD::VEC2PREFSLOT: + case SPUISD::SHLQUAD_L_BITS: + case SPUISD::SHLQUAD_L_BYTES: + case SPUISD::VEC_ROTL: + case SPUISD::VEC_ROTR: + case SPUISD::ROTBYTES_LEFT: + case SPUISD::SELECT_MASK: + case SPUISD::SELB: + } +#endif +} + +unsigned +SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth) const { + switch (Op.getOpcode()) { + default: + return 1; + + case ISD::SETCC: { + EVT VT = Op.getValueType(); + + if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { + VT = MVT::i32; + } + return VT.getSizeInBits(); + } + } +} + +// LowerAsmOperandForConstraint +void +SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + // Default, for the time being, to the base class handler + TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); +} + +/// isLegalAddressImmediate - Return true if the integer value can be used +/// as the offset of the target addressing mode. +bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, + Type *Ty) const { + // SPU's addresses are 256K: + return (V > -(1 << 18) && V < (1 << 18) - 1); +} + +bool SPUTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const { + return false; +} + +bool +SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // The SPU target isn't yet aware of offsets. + return false; +} + +// can we compare to Imm without writing it into a register? +bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + //ceqi, cgti, etc. all take s10 operand + return isInt<10>(Imm); +} + +bool +SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM, + Type * ) const{ + + // A-form: 18bit absolute address. + if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0) + return true; + + // D-form: reg + 14bit offset + if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs)) + return true; + + // X-form: reg+reg + if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0) + return true; + + return false; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h new file mode 100644 index 000000000000..e3db7b2f1fbc --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUISelLowering.h @@ -0,0 +1,185 @@ +//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interfaces that Cell SPU uses to lower LLVM code into +// a selection DAG. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_ISELLOWERING_H +#define SPU_ISELLOWERING_H + +#include "SPU.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + namespace SPUISD { + enum NodeType { + // Start the numbering where the builting ops and target ops leave off. + FIRST_NUMBER = ISD::BUILTIN_OP_END, + + // Pseudo instructions: + RET_FLAG, ///< Return with flag, matched by bi instruction + + Hi, ///< High address component (upper 16) + Lo, ///< Low address component (lower 16) + PCRelAddr, ///< Program counter relative address + AFormAddr, ///< A-form address (local store) + IndirectAddr, ///< D-Form "imm($r)" and X-form "$r($r)" + + LDRESULT, ///< Load result (value, chain) + CALL, ///< CALL instruction + SHUFB, ///< Vector shuffle (permute) + SHUFFLE_MASK, ///< Shuffle mask + CNTB, ///< Count leading ones in bytes + PREFSLOT2VEC, ///< Promote scalar->vector + VEC2PREFSLOT, ///< Extract element 0 + SHL_BITS, ///< Shift quad left, by bits + SHL_BYTES, ///< Shift quad left, by bytes + SRL_BYTES, ///< Shift quad right, by bytes. Insert zeros. + VEC_ROTL, ///< Vector rotate left + VEC_ROTR, ///< Vector rotate right + ROTBYTES_LEFT, ///< Rotate bytes (loads -> ROTQBYI) + ROTBYTES_LEFT_BITS, ///< Rotate bytes left by bit shift count + SELECT_MASK, ///< Select Mask (FSM, FSMB, FSMH, FSMBI) + SELB, ///< Select bits -> (b & mask) | (a & ~mask) + // Markers: These aren't used to generate target-dependent nodes, but + // are used during instruction selection. + ADD64_MARKER, ///< i64 addition marker + SUB64_MARKER, ///< i64 subtraction marker + MUL64_MARKER, ///< i64 multiply marker + LAST_SPUISD ///< Last user-defined instruction + }; + } + + //! Utility functions specific to CellSPU: + namespace SPU { + SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType); + SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType); + SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType); + SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType); + SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, + EVT ValueType); + SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG); + SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG); + + SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG, + const SPUTargetMachine &TM); + //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form + SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat, + DebugLoc dl); + } + + class SPUTargetMachine; // forward dec'l. + + class SPUTargetLowering : + public TargetLowering + { + int VarArgsFrameIndex; // FrameIndex for start of varargs area. + SPUTargetMachine &SPUTM; + + public: + //! The venerable constructor + /*! + This is where the CellSPU backend sets operation handling (i.e., legal, + custom, expand or promote.) + */ + SPUTargetLowering(SPUTargetMachine &TM); + + //! Get the target machine + SPUTargetMachine &getSPUTargetMachine() { + return SPUTM; + } + + /// getTargetNodeName() - This method returns the name of a target specific + /// DAG node. + virtual const char *getTargetNodeName(unsigned Opcode) const; + + /// getSetCCResultType - Return the ValueType for ISD::SETCC + virtual EVT getSetCCResultType(EVT VT) const; + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + + //! Custom lowering hooks + virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + + //! Custom lowering hook for nodes with illegal result types. + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, + SelectionDAG &DAG) const; + + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + + virtual void computeMaskedBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth = 0) const; + + virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, + unsigned Depth = 0) const; + + ConstraintType getConstraintType(const std::string &ConstraintLetter) const; + + /// Examine constraint string and operand type and determine a weight value. + /// The operand object must already have been set up with the operand type. + ConstraintWeight getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const; + + std::pair<unsigned, const TargetRegisterClass*> + getRegForInlineAsmConstraint(const std::string &Constraint, + EVT VT) const; + + void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const; + + /// isLegalAddressImmediate - Return true if the integer value can be used + /// as the offset of the target addressing mode. + virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const; + virtual bool isLegalAddressImmediate(GlobalValue *) const; + + virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; + + virtual SDValue + LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerCall(SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, + bool doesNotRet, bool &isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const; + + virtual SDValue + LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) const; + + virtual bool isLegalICmpImmediate(int64_t Imm) const; + + virtual bool isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const; + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h new file mode 100644 index 000000000000..b495537fc2c8 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrBuilder.h @@ -0,0 +1,43 @@ +//===-- SPUInstrBuilder.h - Aides for building Cell SPU insts ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file exposes functions that may be used with BuildMI from the +// MachineInstrBuilder.h file to simplify generating frame and constant pool +// references. +// +// For reference, the order of operands for memory references is: +// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate +// Displacement. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_INSTRBUILDER_H +#define SPU_INSTRBUILDER_H + +#include "llvm/CodeGen/MachineInstrBuilder.h" + +namespace llvm { + +/// addFrameReference - This function is used to add a reference to the base of +/// an abstract object on the stack frame of the current function. This +/// reference has base register as the FrameIndex offset until it is resolved. +/// This allows a constant offset to be specified as well... +/// +inline const MachineInstrBuilder& +addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0, + bool mem = true) { + if (mem) + return MIB.addImm(Offset).addFrameIndex(FI); + else + return MIB.addFrameIndex(FI).addImm(Offset); +} + +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td new file mode 100644 index 000000000000..cd3f42214345 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrFormats.td @@ -0,0 +1,320 @@ +//===-- SPUInstrFormats.td - Cell SPU Instruction Formats --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// Cell SPU instruction formats. Note that these are notationally similar to +// PowerPC, like "A-Form". But the sizes of operands and fields differ. + +// This was kiped from the PPC instruction formats (seemed like a good idea...) + +class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin> + : Instruction { + field bits<32> Inst; + + let Namespace = "SPU"; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; +} + +// RR Format +class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> { + bits<7> RA; + bits<7> RB; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = opcode; + let Inst{11-17} = RB; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +let RB = 0 in { + // RR Format, where RB is zeroed (dont care): + class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : RRForm<opcode, OOL, IOL, asmstr, itin, pattern> + { } + + let RA = 0 in { + // RR Format, where RA and RB are zeroed (dont care): + // Used for reads from status control registers (see FPSCRRr32) + class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : RRForm<opcode, OOL, IOL, asmstr, itin, pattern> + { } + } +} + +let RT = 0 in { + // RR Format, where RT is zeroed (don't care), or as the instruction handbook + // says, "RT is a false target." Used in "Halt if" instructions + class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : RRForm<opcode, OOL, IOL, asmstr, itin, pattern> + { } +} + +// RRR Format +class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> +{ + bits<7> RA; + bits<7> RB; + bits<7> RC; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-3} = opcode; + let Inst{4-10} = RT; + let Inst{11-17} = RB; + let Inst{18-24} = RA; + let Inst{25-31} = RC; +} + +// RI7 Format +class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> +{ + bits<7> i7; + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-10} = opcode; + let Inst{11-17} = i7; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// CVTIntFp Format +class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> +{ + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-9} = opcode; + let Inst{10-17} = 0; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +let RA = 0 in { + class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern> + : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern> + { } + + let RT = 0 in { + // Branch instruction format (without D/E flag settings) + class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : RRForm<opcode, OOL, IOL, asmstr, itin, pattern> + { } + + class BIForm<bits<11> opcode, string asmstr, list<dag> pattern> + : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv, + pattern> + { } + + let RB = 0 in { + // Return instruction (bi, branch indirect), RA is zero (LR): + class RETForm<string asmstr, list<dag> pattern> + : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv, + pattern> + { } + } + } +} + +// Branch indirect external data forms: +class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern> + : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv> +{ + bits<7> Rcalldest; + + let Pattern = pattern; + + let Inst{0-10} = 0b11010101100; + let Inst{11} = 0; + let Inst{12-13} = DE_flag; + let Inst{14-17} = 0b0000; + let Inst{18-24} = Rcalldest; + let Inst{25-31} = 0b0000000; +} + +// RI10 Format +class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> +{ + bits<10> i10; + bits<7> RA; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-7} = opcode; + let Inst{8-17} = i10; + let Inst{18-24} = RA; + let Inst{25-31} = RT; +} + +// RI10 Format, where the constant is zero (or effectively ignored by the +// SPU) +let i10 = 0 in { + class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern> + { } +} + +// RI10 Format, where RT is ignored. +// This format is used primarily by the Halt If ... Immediate set of +// instructions +let RT = 0 in { + class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern> + { } +} + +// RI16 Format +class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> +{ + bits<16> i16; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-8} = opcode; + let Inst{9-24} = i16; + let Inst{25-31} = RT; +} + +// Specialized version of the RI16 Format for unconditional branch relative and +// branch absolute, branch and set link. Note that for branch and set link, the +// link register doesn't have to be $lr, but this is actually hard coded into +// the instruction pattern. + +let RT = 0 in { + class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern> + { } + + class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr, + list<dag> pattern> + : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern> + { } +} + +//===----------------------------------------------------------------------===// +// Specialized versions of RI16: +//===----------------------------------------------------------------------===// + +// RI18 Format +class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, itin> +{ + bits<18> i18; + bits<7> RT; + + let Pattern = pattern; + + let Inst{0-6} = opcode; + let Inst{7-24} = i18; + let Inst{25-31} = RT; +} + +//===----------------------------------------------------------------------===// +// Instruction formats for intrinsics: +//===----------------------------------------------------------------------===// + +// RI10 Format for v8i16 intrinsics +class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA), + !strconcat(opc, " $rT, $rA, $val"), itin, + [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA), + i16ImmSExt10:$val))] >; + +class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA), + !strconcat(opc, " $rT, $rA, $val"), itin, + [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA), + i32ImmSExt10:$val))] >; + +// RR Format for v8i16 intrinsics +class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + !strconcat(opc, " $rT, $rA, $rB"), itin, + [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))] >; + +// RR Format for v4i32 intrinsics +class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin, + Intrinsic IntID> : + RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + !strconcat(opc, " $rT, $rA, $rB"), itin, + [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA), + (v4i32 VECREG:$rB)))] >; + +//===----------------------------------------------------------------------===// +// Pseudo instructions, like call frames: +//===----------------------------------------------------------------------===// + +class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern> + : SPUInstr<OOL, IOL, asmstr, NoItinerary> { + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Pattern = pattern; + let Inst{31-0} = 0; +} + +//===----------------------------------------------------------------------===// +// Branch hint formats +//===----------------------------------------------------------------------===// +// For hbrr and hbra +class HBI16Form<bits<7> opcode, dag IOL, string asmstr> + : Instruction { + field bits<32> Inst; + bits<16>i16; + bits<9>RO; + + let Namespace = "SPU"; + let InOperandList = IOL; + let OutOperandList = (outs); //no output + let AsmString = asmstr; + let Itinerary = BranchHints; + + let Inst{0-6} = opcode; + let Inst{7-8} = RO{8-7}; + let Inst{9-24} = i16; + let Inst{25-31} = RO{6-0}; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp new file mode 100644 index 000000000000..759923d7bb42 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -0,0 +1,453 @@ +//===-- SPUInstrInfo.cpp - Cell SPU Instruction Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell SPU implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "SPUInstrInfo.h" +#include "SPUInstrBuilder.h" +#include "SPUTargetMachine.h" +#include "SPUHazardRecognizers.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" + +#define GET_INSTRINFO_CTOR +#include "SPUGenInstrInfo.inc" + +using namespace llvm; + +namespace { + //! Predicate for an unconditional branch instruction + inline bool isUncondBranch(const MachineInstr *I) { + unsigned opc = I->getOpcode(); + + return (opc == SPU::BR + || opc == SPU::BRA + || opc == SPU::BI); + } + + //! Predicate for a conditional branch instruction + inline bool isCondBranch(const MachineInstr *I) { + unsigned opc = I->getOpcode(); + + return (opc == SPU::BRNZr32 + || opc == SPU::BRNZv4i32 + || opc == SPU::BRZr32 + || opc == SPU::BRZv4i32 + || opc == SPU::BRHNZr16 + || opc == SPU::BRHNZv8i16 + || opc == SPU::BRHZr16 + || opc == SPU::BRHZv8i16); + } +} + +SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm) + : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP), + TM(tm), + RI(*TM.getSubtargetImpl(), *this) +{ /* NOP */ } + +/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for +/// this target when scheduling the DAG. +ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer( + const TargetMachine *TM, + const ScheduleDAG *DAG) const { + const TargetInstrInfo *TII = TM->getInstrInfo(); + assert(TII && "No InstrInfo?"); + return new SPUHazardRecognizer(*TII); +} + +unsigned +SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case SPU::LQDv16i8: + case SPU::LQDv8i16: + case SPU::LQDv4i32: + case SPU::LQDv4f32: + case SPU::LQDv2f64: + case SPU::LQDr128: + case SPU::LQDr64: + case SPU::LQDr32: + case SPU::LQDr16: { + const MachineOperand MOp1 = MI->getOperand(1); + const MachineOperand MOp2 = MI->getOperand(2); + if (MOp1.isImm() && MOp2.isFI()) { + FrameIndex = MOp2.getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + } + return 0; +} + +unsigned +SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + switch (MI->getOpcode()) { + default: break; + case SPU::STQDv16i8: + case SPU::STQDv8i16: + case SPU::STQDv4i32: + case SPU::STQDv4f32: + case SPU::STQDv2f64: + case SPU::STQDr128: + case SPU::STQDr64: + case SPU::STQDr32: + case SPU::STQDr16: + case SPU::STQDr8: { + const MachineOperand MOp1 = MI->getOperand(1); + const MachineOperand MOp2 = MI->getOperand(2); + if (MOp1.isImm() && MOp2.isFI()) { + FrameIndex = MOp2.getIndex(); + return MI->getOperand(0).getReg(); + } + break; + } + } + return 0; +} + +void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const +{ + // We support cross register class moves for our aliases, such as R3 in any + // reg class to any other reg class containing R3. This is required because + // we instruction select bitconvert i64 -> f64 as a noop for example, so our + // types have no specific meaning. + + BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); +} + +void +SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned SrcReg, bool isKill, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const +{ + unsigned opc; + bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); + if (RC == SPU::GPRCRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128); + } else if (RC == SPU::R64CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64); + } else if (RC == SPU::R64FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64); + } else if (RC == SPU::R32CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32); + } else if (RC == SPU::R32FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32); + } else if (RC == SPU::R16CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16); + } else if (RC == SPU::R8CRegisterClass) { + opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8); + } else if (RC == SPU::VECREGRegisterClass) { + opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8; + } else { + llvm_unreachable("Unknown regclass!"); + } + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(opc)) + .addReg(SrcReg, getKillRegState(isKill)), FrameIdx); +} + +void +SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, int FrameIdx, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const +{ + unsigned opc; + bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset()); + if (RC == SPU::GPRCRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128); + } else if (RC == SPU::R64CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64); + } else if (RC == SPU::R64FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64); + } else if (RC == SPU::R32CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32); + } else if (RC == SPU::R32FPRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32); + } else if (RC == SPU::R16CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16); + } else if (RC == SPU::R8CRegisterClass) { + opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8); + } else if (RC == SPU::VECREGRegisterClass) { + opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8; + } else { + llvm_unreachable("Unknown regclass in loadRegFromStackSlot!"); + } + + DebugLoc DL; + if (MI != MBB.end()) DL = MI->getDebugLoc(); + addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); +} + +//! Branch analysis +/*! + \note This code was kiped from PPC. There may be more branch analysis for + CellSPU than what's currently done here. + */ +bool +SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const { + // If the block has no terminators, it just falls into the block after it. + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin()) + return false; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return false; + --I; + } + if (!isUnpredicatedTerminator(I)) + return false; + + // Get the last instruction in the block. + MachineInstr *LastInst = I; + + // If there is only one terminator instruction, process it. + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { + if (isUncondBranch(LastInst)) { + // Check for jump tables + if (!LastInst->getOperand(0).isMBB()) + return true; + TBB = LastInst->getOperand(0).getMBB(); + return false; + } else if (isCondBranch(LastInst)) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(1).getMBB(); + DEBUG(errs() << "Pushing LastInst: "); + DEBUG(LastInst->dump()); + Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); + Cond.push_back(LastInst->getOperand(0)); + return false; + } + // Otherwise, don't know what this is. + return true; + } + + // Get the instruction before it if it's a terminator. + MachineInstr *SecondLastInst = I; + + // If there are three terminators, we don't know what sort of block this is. + if (SecondLastInst && I != MBB.begin() && + isUnpredicatedTerminator(--I)) + return true; + + // If the block ends with a conditional and unconditional branch, handle it. + if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) { + TBB = SecondLastInst->getOperand(1).getMBB(); + DEBUG(errs() << "Pushing SecondLastInst: "); + DEBUG(SecondLastInst->dump()); + Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode())); + Cond.push_back(SecondLastInst->getOperand(0)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } + + // If the block ends with two unconditional branches, handle it. The second + // one is not executed, so remove it. + if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + if (AllowModify) + I->eraseFromParent(); + return false; + } + + // Otherwise, can't handle this. + return true; +} + +// search MBB for branch hint labels and branch hit ops +static void removeHBR( MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){ + if (I->getOpcode() == SPU::HBRA || + I->getOpcode() == SPU::HBR_LABEL){ + I=MBB.erase(I); + if (I == MBB.end()) + break; + } + } +} + +unsigned +SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + MachineBasicBlock::iterator I = MBB.end(); + removeHBR(MBB); + if (I == MBB.begin()) + return 0; + --I; + while (I->isDebugValue()) { + if (I == MBB.begin()) + return 0; + --I; + } + if (!isCondBranch(I) && !isUncondBranch(I)) + return 0; + + // Remove the first branch. + DEBUG(errs() << "Removing branch: "); + DEBUG(I->dump()); + I->eraseFromParent(); + I = MBB.end(); + if (I == MBB.begin()) + return 1; + + --I; + if (!(isCondBranch(I) || isUncondBranch(I))) + return 1; + + // Remove the second branch. + DEBUG(errs() << "Removing second branch: "); + DEBUG(I->dump()); + I->eraseFromParent(); + return 2; +} + +/** Find the optimal position for a hint branch instruction in a basic block. + * This should take into account: + * -the branch hint delays + * -congestion of the memory bus + * -dual-issue scheduling (i.e. avoid insertion of nops) + * Current implementation is rather simplistic. + */ +static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB) +{ + MachineBasicBlock::iterator J = MBB.end(); + for( int i=0; i<8; i++) { + if( J == MBB.begin() ) return J; + J--; + } + return J; +} + +unsigned +SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { + // Shouldn't be a fall through. + assert(TBB && "InsertBranch must not be told to insert a fallthrough"); + assert((Cond.size() == 2 || Cond.size() == 0) && + "SPU branch conditions have two components!"); + + MachineInstrBuilder MIB; + //TODO: make a more accurate algorithm. + bool haveHBR = MBB.size()>8; + + removeHBR(MBB); + MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol(); + // Add a label just before the branch + if (haveHBR) + MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel); + + // One-way branch. + if (FBB == 0) { + if (Cond.empty()) { + // Unconditional branch + MIB = BuildMI(&MBB, DL, get(SPU::BR)); + MIB.addMBB(TBB); + + DEBUG(errs() << "Inserted one-way uncond branch: "); + DEBUG((*MIB).dump()); + + // basic blocks have just one branch so it is safe to add the hint a its + if (haveHBR) { + MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(TBB); + } + } else { + // Conditional branch + MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MIB.addReg(Cond[1].getReg()).addMBB(TBB); + + if (haveHBR) { + MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(TBB); + } + + DEBUG(errs() << "Inserted one-way cond branch: "); + DEBUG((*MIB).dump()); + } + return 1; + } else { + MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); + + // Two-way Conditional Branch. + MIB.addReg(Cond[1].getReg()).addMBB(TBB); + MIB2.addMBB(FBB); + + if (haveHBR) { + MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(FBB); + } + + DEBUG(errs() << "Inserted conditional branch: "); + DEBUG((*MIB).dump()); + DEBUG(errs() << "part 2: "); + DEBUG((*MIB2).dump()); + return 2; + } +} + +//! Reverses a branch's condition, returning false on success. +bool +SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) + const { + // Pretty brainless way of inverting the condition, but it works, considering + // there are only two conditions... + static struct { + unsigned Opc; //! The incoming opcode + unsigned RevCondOpc; //! The reversed condition opcode + } revconds[] = { + { SPU::BRNZr32, SPU::BRZr32 }, + { SPU::BRNZv4i32, SPU::BRZv4i32 }, + { SPU::BRZr32, SPU::BRNZr32 }, + { SPU::BRZv4i32, SPU::BRNZv4i32 }, + { SPU::BRHNZr16, SPU::BRHZr16 }, + { SPU::BRHNZv8i16, SPU::BRHZv8i16 }, + { SPU::BRHZr16, SPU::BRHNZr16 }, + { SPU::BRHZv8i16, SPU::BRHNZv8i16 } + }; + + unsigned Opc = unsigned(Cond[0].getImm()); + // Pretty dull mapping between the two conditions that SPU can generate: + for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) { + if (revconds[i].Opc == Opc) { + Cond[0].setImm(revconds[i].RevCondOpc); + return false; + } + } + + return true; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h new file mode 100644 index 000000000000..85e5821aefa1 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.h @@ -0,0 +1,84 @@ +//===-- SPUInstrInfo.h - Cell SPU Instruction Information -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the CellSPU implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_INSTRUCTIONINFO_H +#define SPU_INSTRUCTIONINFO_H + +#include "SPU.h" +#include "SPURegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +#define GET_INSTRINFO_HEADER +#include "SPUGenInstrInfo.inc" + +namespace llvm { + //! Cell SPU instruction information class + class SPUInstrInfo : public SPUGenInstrInfo { + SPUTargetMachine &TM; + const SPURegisterInfo RI; + public: + explicit SPUInstrInfo(SPUTargetMachine &tm); + + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As + /// such, whenever a client has an instance of instruction info, it should + /// always be able to get register info as well (through this method). + /// + virtual const SPURegisterInfo &getRegisterInfo() const { return RI; } + + ScheduleHazardRecognizer * + CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const; + + unsigned isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; + + //! Store a register to a stack slot, based on its register class. + virtual void storeRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned SrcReg, bool isKill, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + //! Load a register from a stack slot, based on its register class. + virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned DestReg, int FrameIndex, + const TargetRegisterClass *RC, + const TargetRegisterInfo *TRI) const; + + //! Reverses a branch's condition, returning false on success. + virtual + bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const; + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl<MachineOperand> &Cond, + bool AllowModify) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const; + }; +} + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td new file mode 100644 index 000000000000..f76ebd75bfef --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUInstrInfo.td @@ -0,0 +1,4484 @@ +//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Cell SPU Instructions: +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TODO Items (not urgent today, but would be nice, low priority) +// +// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by +// concatenating the byte argument b as "bbbb". Could recognize this bit pattern +// in 16-bit and 32-bit constants and reduce instruction count. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Pseudo instructions: +//===----------------------------------------------------------------------===// + +let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt), + "${:comment} ADJCALLSTACKDOWN", + [(callseq_start timm:$amt)]>; + def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt), + "${:comment} ADJCALLSTACKUP", + [(callseq_end timm:$amt)]>; + def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ), + "$targ:\t${:comment}branch hint target",[ ]>; +} + +//===----------------------------------------------------------------------===// +// Loads: +// NB: The ordering is actually important, since the instruction selection +// will try each of the instructions in sequence, i.e., the D-form first with +// the 10-bit displacement, then the A-form with the 16 bit displacement, and +// finally the X-form with the register-register. +//===----------------------------------------------------------------------===// + +let canFoldAsLoad = 1 in { + class LoadDFormVec<ValueType vectype> + : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src), + "lqd\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load dform_addr:$src))]> + { } + + class LoadDForm<RegisterClass rclass> + : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src), + "lqd\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load dform_addr:$src))]> + { } + + multiclass LoadDForms + { + def v16i8: LoadDFormVec<v16i8>; + def v8i16: LoadDFormVec<v8i16>; + def v4i32: LoadDFormVec<v4i32>; + def v2i64: LoadDFormVec<v2i64>; + def v4f32: LoadDFormVec<v4f32>; + def v2f64: LoadDFormVec<v2f64>; + + def r128: LoadDForm<GPRC>; + def r64: LoadDForm<R64C>; + def r32: LoadDForm<R32C>; + def f32: LoadDForm<R32FP>; + def f64: LoadDForm<R64FP>; + def r16: LoadDForm<R16C>; + def r8: LoadDForm<R8C>; + } + + class LoadAFormVec<ValueType vectype> + : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load aform_addr:$src))]> + { } + + class LoadAForm<RegisterClass rclass> + : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src), + "lqa\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load aform_addr:$src))]> + { } + + multiclass LoadAForms + { + def v16i8: LoadAFormVec<v16i8>; + def v8i16: LoadAFormVec<v8i16>; + def v4i32: LoadAFormVec<v4i32>; + def v2i64: LoadAFormVec<v2i64>; + def v4f32: LoadAFormVec<v4f32>; + def v2f64: LoadAFormVec<v2f64>; + + def r128: LoadAForm<GPRC>; + def r64: LoadAForm<R64C>; + def r32: LoadAForm<R32C>; + def f32: LoadAForm<R32FP>; + def f64: LoadAForm<R64FP>; + def r16: LoadAForm<R16C>; + def r8: LoadAForm<R8C>; + } + + class LoadXFormVec<ValueType vectype> + : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set (vectype VECREG:$rT), (load xform_addr:$src))]> + { } + + class LoadXForm<RegisterClass rclass> + : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src), + "lqx\t$rT, $src", + LoadStore, + [(set rclass:$rT, (load xform_addr:$src))]> + { } + + multiclass LoadXForms + { + def v16i8: LoadXFormVec<v16i8>; + def v8i16: LoadXFormVec<v8i16>; + def v4i32: LoadXFormVec<v4i32>; + def v2i64: LoadXFormVec<v2i64>; + def v4f32: LoadXFormVec<v4f32>; + def v2f64: LoadXFormVec<v2f64>; + + def r128: LoadXForm<GPRC>; + def r64: LoadXForm<R64C>; + def r32: LoadXForm<R32C>; + def f32: LoadXForm<R32FP>; + def f64: LoadXForm<R64FP>; + def r16: LoadXForm<R16C>; + def r8: LoadXForm<R8C>; + } + + defm LQA : LoadAForms; + defm LQD : LoadDForms; + defm LQX : LoadXForms; + +/* Load quadword, PC relative: Not much use at this point in time. + Might be of use later for relocatable code. It's effectively the + same as LQA, but uses PC-relative addressing. + def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp), + "lqr\t$rT, $disp", LoadStore, + [(set VECREG:$rT, (load iaddr:$disp))]>; + */ +} + +//===----------------------------------------------------------------------===// +// Stores: +//===----------------------------------------------------------------------===// +class StoreDFormVec<ValueType vectype> + : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src), + "stqd\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), dform_addr:$src)]> +{ } + +class StoreDForm<RegisterClass rclass> + : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src), + "stqd\t$rT, $src", + LoadStore, + [(store rclass:$rT, dform_addr:$src)]> +{ } + +multiclass StoreDForms +{ + def v16i8: StoreDFormVec<v16i8>; + def v8i16: StoreDFormVec<v8i16>; + def v4i32: StoreDFormVec<v4i32>; + def v2i64: StoreDFormVec<v2i64>; + def v4f32: StoreDFormVec<v4f32>; + def v2f64: StoreDFormVec<v2f64>; + + def r128: StoreDForm<GPRC>; + def r64: StoreDForm<R64C>; + def r32: StoreDForm<R32C>; + def f32: StoreDForm<R32FP>; + def f64: StoreDForm<R64FP>; + def r16: StoreDForm<R16C>; + def r8: StoreDForm<R8C>; +} + +class StoreAFormVec<ValueType vectype> + : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), aform_addr:$src)]>; + +class StoreAForm<RegisterClass rclass> + : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src), + "stqa\t$rT, $src", + LoadStore, + [(store rclass:$rT, aform_addr:$src)]>; + +multiclass StoreAForms +{ + def v16i8: StoreAFormVec<v16i8>; + def v8i16: StoreAFormVec<v8i16>; + def v4i32: StoreAFormVec<v4i32>; + def v2i64: StoreAFormVec<v2i64>; + def v4f32: StoreAFormVec<v4f32>; + def v2f64: StoreAFormVec<v2f64>; + + def r128: StoreAForm<GPRC>; + def r64: StoreAForm<R64C>; + def r32: StoreAForm<R32C>; + def f32: StoreAForm<R32FP>; + def f64: StoreAForm<R64FP>; + def r16: StoreAForm<R16C>; + def r8: StoreAForm<R8C>; +} + +class StoreXFormVec<ValueType vectype> + : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store (vectype VECREG:$rT), xform_addr:$src)]> +{ } + +class StoreXForm<RegisterClass rclass> + : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src), + "stqx\t$rT, $src", + LoadStore, + [(store rclass:$rT, xform_addr:$src)]> +{ } + +multiclass StoreXForms +{ + def v16i8: StoreXFormVec<v16i8>; + def v8i16: StoreXFormVec<v8i16>; + def v4i32: StoreXFormVec<v4i32>; + def v2i64: StoreXFormVec<v2i64>; + def v4f32: StoreXFormVec<v4f32>; + def v2f64: StoreXFormVec<v2f64>; + + def r128: StoreXForm<GPRC>; + def r64: StoreXForm<R64C>; + def r32: StoreXForm<R32C>; + def f32: StoreXForm<R32FP>; + def f64: StoreXForm<R64FP>; + def r16: StoreXForm<R16C>; + def r8: StoreXForm<R8C>; +} + +defm STQD : StoreDForms; +defm STQA : StoreAForms; +defm STQX : StoreXForms; + +/* Store quadword, PC relative: Not much use at this point in time. Might + be useful for relocatable code. +def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp), + "stqr\t$rT, $disp", LoadStore, + [(store VECREG:$rT, iaddr:$disp)]>; +*/ + +//===----------------------------------------------------------------------===// +// Generate Controls for Insertion: +//===----------------------------------------------------------------------===// + +def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cbd\t$rT, $src", ShuffleOp, + [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src), + "cbx\t$rT, $src", ShuffleOp, + [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "chd\t$rT, $src", ShuffleOp, + [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src), + "chx\t$rT, $src", ShuffleOp, + [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cwd\t$rT, $src", ShuffleOp, + [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), + "cwx\t$rT, $src", ShuffleOp, + [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cwd\t$rT, $src", ShuffleOp, + [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src), + "cwx\t$rT, $src", ShuffleOp, + [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cdd\t$rT, $src", ShuffleOp, + [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), + "cdx\t$rT, $src", ShuffleOp, + [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src), + "cdd\t$rT, $src", ShuffleOp, + [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>; + +def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src), + "cdx\t$rT, $src", ShuffleOp, + [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>; + +//===----------------------------------------------------------------------===// +// Constant formation: +//===----------------------------------------------------------------------===// + +def ILHv8i16: + RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val), + "ilh\t$rT, $val", ImmLoad, + [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>; + +def ILHr16: + RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val), + "ilh\t$rT, $val", ImmLoad, + [(set R16C:$rT, immSExt16:$val)]>; + +// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with +// the right constant") +def ILHr8: + RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val), + "ilh\t$rT, $val", ImmLoad, + [(set R8C:$rT, immSExt8:$val)]>; + +// IL does sign extension! + +class ILInst<dag OOL, dag IOL, list<dag> pattern>: + RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val", + ImmLoad, pattern>; + +class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>: + ILInst<(outs VECREG:$rT), (ins immtype:$val), + [(set (vectype VECREG:$rT), (vectype xform:$val))]>; + +class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>: + ILInst<(outs rclass:$rT), (ins immtype:$val), + [(set rclass:$rT, xform:$val)]>; + +multiclass ImmediateLoad +{ + def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>; + def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>; + + // TODO: Need v2f64, v4f32 + + def r64: ILRegInst<R64C, s16imm_i64, immSExt16>; + def r32: ILRegInst<R32C, s16imm_i32, immSExt16>; + def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>; + def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>; +} + +defm IL : ImmediateLoad; + +class ILHUInst<dag OOL, dag IOL, list<dag> pattern>: + RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val", + ImmLoad, pattern>; + +class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>: + ILHUInst<(outs VECREG:$rT), (ins immtype:$val), + [(set (vectype VECREG:$rT), (vectype xform:$val))]>; + +class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>: + ILHUInst<(outs rclass:$rT), (ins immtype:$val), + [(set rclass:$rT, xform:$val)]>; + +multiclass ImmLoadHalfwordUpper +{ + def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>; + def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>; + + def r64: ILHURegInst<R64C, u16imm_i64, hi16>; + def r32: ILHURegInst<R32C, u16imm_i32, hi16>; + + // Loads the high portion of an address + def hi: ILHURegInst<R32C, symbolHi, hi16>; + + // Used in custom lowering constant SFP loads: + def f32: ILHURegInst<R32FP, f16imm, hi16_f32>; +} + +defm ILHU : ImmLoadHalfwordUpper; + +// Immediate load address (can also be used to load 18-bit unsigned constants, +// see the zext 16->32 pattern) + +class ILAInst<dag OOL, dag IOL, list<dag> pattern>: + RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val", + LoadNOP, pattern>; + +class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>: + ILAInst<(outs VECREG:$rT), (ins immtype:$val), + [(set (vectype VECREG:$rT), (vectype xform:$val))]>; + +class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>: + ILAInst<(outs rclass:$rT), (ins immtype:$val), + [(set rclass:$rT, xform:$val)]>; + +multiclass ImmLoadAddress +{ + def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>; + def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>; + + def r64: ILARegInst<R64C, u18imm_i64, imm18>; + def r32: ILARegInst<R32C, u18imm, imm18>; + def f32: ILARegInst<R32FP, f18imm, fpimm18>; + def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>; + + def hi: ILARegInst<R32C, symbolHi, imm18>; + def lo: ILARegInst<R32C, symbolLo, imm18>; + + def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val), + [(set R32C:$rT, imm18:$val)]>; +} + +defm ILA : ImmLoadAddress; + +// Immediate OR, Halfword Lower: The "other" part of loading large constants +// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...> +// Note that these are really two operand instructions, but they're encoded +// as three operands with the first two arguments tied-to each other. + +class IOHLInst<dag OOL, dag IOL, list<dag> pattern>: + RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val", + ImmLoad, pattern>, + RegConstraint<"$rS = $rT">, + NoEncode<"$rS">; + +class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>: + IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val), + [/* no pattern */]>; + +class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>: + IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val), + [/* no pattern */]>; + +multiclass ImmOrHalfwordLower +{ + def v2i64: IOHLVecInst<v2i64, u16imm_i64>; + def v4i32: IOHLVecInst<v4i32, u16imm_i32>; + + def r32: IOHLRegInst<R32C, i32imm>; + def f32: IOHLRegInst<R32FP, f32imm>; + + def lo: IOHLRegInst<R32C, symbolLo>; +} + +defm IOHL: ImmOrHalfwordLower; + +// Form select mask for bytes using immediate, used in conjunction with the +// SELB instruction: + +class FSMBIVec<ValueType vectype>: + RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val), + "fsmbi\t$rT, $val", + SelectOp, + [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>; + +multiclass FormSelectMaskBytesImm +{ + def v16i8: FSMBIVec<v16i8>; + def v8i16: FSMBIVec<v8i16>; + def v4i32: FSMBIVec<v4i32>; + def v2i64: FSMBIVec<v2i64>; +} + +defm FSMBI : FormSelectMaskBytesImm; + +// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits +class FSMBInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp, + pattern>; + +class FSMBRegInst<RegisterClass rclass, ValueType vectype>: + FSMBInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMBVecInst<ValueType vectype>: + FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskBits { + def v16i8_r16: FSMBRegInst<R16C, v16i8>; + def v16i8: FSMBVecInst<v16i8>; +} + +defm FSMB: FormSelectMaskBits; + +// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is +// only 8-bits wide (even though it's input as 16-bits here) + +class FSMHInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp, + pattern>; + +class FSMHRegInst<RegisterClass rclass, ValueType vectype>: + FSMHInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMHVecInst<ValueType vectype>: + FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), + (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskHalfword { + def v8i16_r16: FSMHRegInst<R16C, v8i16>; + def v8i16: FSMHVecInst<v8i16>; +} + +defm FSMH: FormSelectMaskHalfword; + +// fsm: Form select mask for words. Like the other fsm* instructions, +// only the lower 4 bits of $rA are significant. + +class FSMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp, + pattern>; + +class FSMRegInst<ValueType vectype, RegisterClass rclass>: + FSMInst<(outs VECREG:$rT), (ins rclass:$rA), + [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>; + +class FSMVecInst<ValueType vectype>: + FSMInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>; + +multiclass FormSelectMaskWord { + def v4i32: FSMVecInst<v4i32>; + + def r32 : FSMRegInst<v4i32, R32C>; + def r16 : FSMRegInst<v4i32, R16C>; +} + +defm FSM : FormSelectMaskWord; + +// Special case when used for i64 math operations +multiclass FormSelectMaskWord64 { + def r32 : FSMRegInst<v2i64, R32C>; + def r16 : FSMRegInst<v2i64, R16C>; +} + +defm FSM64 : FormSelectMaskWord64; + +//===----------------------------------------------------------------------===// +// Integer and Logical Operations: +//===----------------------------------------------------------------------===// + +def AHv8i16: + RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "ah\t$rT, $rA, $rB", IntegerOp, + [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>; + +def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (AHv8i16 VECREG:$rA, VECREG:$rB)>; + +def AHr16: + RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "ah\t$rT, $rA, $rB", IntegerOp, + [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>; + +def AHIvec: + RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "ahi\t$rT, $rA, $val", IntegerOp, + [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA), + v8i16SExt10Imm:$val))]>; + +def AHIr16: + RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + "ahi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>; + +// v4i32, i32 add instruction: + +class AInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00000011000, OOL, IOL, + "a\t$rT, $rA, $rB", IntegerOp, + pattern>; + +class AVecInst<ValueType vectype>: + AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class ARegInst<RegisterClass rclass>: + AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>; + +multiclass AddInstruction { + def v4i32: AVecInst<v4i32>; + def v16i8: AVecInst<v16i8>; + def r32: ARegInst<R32C>; +} + +defm A : AddInstruction; + +class AIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b00111000, OOL, IOL, + "ai\t$rT, $rA, $val", IntegerOp, + pattern>; + +class AIVecInst<ValueType vectype, PatLeaf immpred>: + AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>; + +class AIFPVecInst<ValueType vectype, PatLeaf immpred>: + AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [/* no pattern */]>; + +class AIRegInst<RegisterClass rclass, PatLeaf immpred>: + AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), + [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>; + +// This is used to add epsilons to floating point numbers in the f32 fdiv code: +class AIFPInst<RegisterClass rclass, PatLeaf immpred>: + AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val), + [/* no pattern */]>; + +multiclass AddImmediate { + def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>; + + def r32: AIRegInst<R32C, i32ImmSExt10>; + + def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>; + def f32: AIFPInst<R32FP, i32ImmSExt10>; +} + +defm AI : AddImmediate; + +def SFHvec: + RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "sfh\t$rT, $rA, $rB", IntegerOp, + [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + +def SFHr16: + RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "sfh\t$rT, $rA, $rB", IntegerOp, + [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>; + +def SFHIvec: + RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "sfhi\t$rT, $rA, $val", IntegerOp, + [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val, + (v8i16 VECREG:$rA)))]>; + +def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + "sfhi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>; + +def SFvec : RRForm<0b00000010000, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + "sf\t$rT, $rA, $rB", IntegerOp, + [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>; + + +def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + "sf\t$rT, $rA, $rB", IntegerOp, + [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>; + +def SFIvec: + RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + "sfi\t$rT, $rA, $val", IntegerOp, + [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val, + (v4i32 VECREG:$rA)))]>; + +def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT), + (ins R32C:$rA, s10imm_i32:$val), + "sfi\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>; + +// ADDX: only available in vector form, doesn't match a pattern. +class ADDXInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00000010110, OOL, IOL, + "addx\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ADDXVecInst<ValueType vectype>: + ADDXInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +class ADDXRegInst<RegisterClass rclass>: + ADDXInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB, rclass:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +multiclass AddExtended { + def v2i64 : ADDXVecInst<v2i64>; + def v4i32 : ADDXVecInst<v4i32>; + def r64 : ADDXRegInst<R64C>; + def r32 : ADDXRegInst<R32C>; +} + +defm ADDX : AddExtended; + +// CG: Generate carry for add +class CGInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01000011000, OOL, IOL, + "cg\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class CGVecInst<ValueType vectype>: + CGInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +class CGRegInst<RegisterClass rclass>: + CGInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB), + [/* no pattern */]>; + +multiclass CarryGenerate { + def v2i64 : CGVecInst<v2i64>; + def v4i32 : CGVecInst<v4i32>; + def r64 : CGRegInst<R64C>; + def r32 : CGRegInst<R32C>; +} + +defm CG : CarryGenerate; + +// SFX: Subract from, extended. This is used in conjunction with BG to subtract +// with carry (borrow, in this case) +class SFXInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10000010110, OOL, IOL, + "sfx\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class SFXVecInst<ValueType vectype>: + SFXInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +class SFXRegInst<RegisterClass rclass>: + SFXInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB, rclass:$rCarry), + [/* no pattern */]>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +multiclass SubtractExtended { + def v2i64 : SFXVecInst<v2i64>; + def v4i32 : SFXVecInst<v4i32>; + def r64 : SFXRegInst<R64C>; + def r32 : SFXRegInst<R32C>; +} + +defm SFX : SubtractExtended; + +// BG: only available in vector form, doesn't match a pattern. +class BGInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01000010000, OOL, IOL, + "bg\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class BGVecInst<ValueType vectype>: + BGInst<(outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +class BGRegInst<RegisterClass rclass>: + BGInst<(outs rclass:$rT), + (ins rclass:$rA, rclass:$rB), + [/* no pattern */]>; + +multiclass BorrowGenerate { + def v4i32 : BGVecInst<v4i32>; + def v2i64 : BGVecInst<v2i64>; + def r64 : BGRegInst<R64C>; + def r32 : BGRegInst<R32C>; +} + +defm BG : BorrowGenerate; + +// BGX: Borrow generate, extended. +def BGXvec: + RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, + VECREG:$rCarry), + "bgx\t$rT, $rA, $rB", IntegerOp, + []>, + RegConstraint<"$rCarry = $rT">, + NoEncode<"$rCarry">; + +// Halfword multiply variants: +// N.B: These can be used to build up larger quantities (16x16 -> 32) + +def MPYv8i16: + RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "mpy\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYr16: + RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + "mpy\t$rT, $rA, $rB", IntegerMulDiv, + [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>; + +// Unsigned 16-bit multiply: + +class MPYUInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00110011110, OOL, IOL, + "mpyu\t$rT, $rA, $rB", IntegerMulDiv, + pattern>; + +def MPYUv4i32: + MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +def MPYUr16: + MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>; + +def MPYUr32: + MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [/* no pattern */]>; + +// mpyi: multiply 16 x s10imm -> 32 result. + +class MPYIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b00101110, OOL, IOL, + "mpyi\t$rT, $rA, $val", IntegerMulDiv, + pattern>; + +def MPYIvec: + MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; + +def MPYIr16: + MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>; + +// mpyui: same issues as other multiplies, plus, this doesn't match a +// pattern... but may be used during target DAG selection or lowering + +class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b10101110, OOL, IOL, + "mpyui\t$rT, $rA, $val", IntegerMulDiv, + pattern>; + +def MPYUIvec: + MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + []>; + +def MPYUIr16: + MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + []>; + +// mpya: 16 x 16 + 16 -> 32 bit result +class MPYAInst<dag OOL, dag IOL, list<dag> pattern>: + RRRForm<0b0011, OOL, IOL, + "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv, + pattern>; + +def MPYAv4i32: + MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (v4i32 VECREG:$rT), + (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))), + (v4i32 VECREG:$rC)))]>; + +def MPYAr32: + MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), + [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)), + R32C:$rC))]>; + +def MPYAr32_sext: + MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC), + [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)), + R32C:$rC))]>; + +def MPYAr32_sextinreg: + MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC), + [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16), + (sext_inreg R32C:$rB, i16)), + R32C:$rC))]>; + +// mpyh: multiply high, used to synthesize 32-bit multiplies +class MPYHInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10100011110, OOL, IOL, + "mpyh\t$rT, $rA, $rB", IntegerMulDiv, + pattern>; + +def MPYHv4i32: + MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern */]>; + +def MPYHr32: + MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [/* no pattern */]>; + +// mpys: multiply high and shift right (returns the top half of +// a 16-bit multiply, sign extended to 32 bits.) + +class MPYSInst<dag OOL, dag IOL>: + RRForm<0b11100011110, OOL, IOL, + "mpys\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYSv4i32: + MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYSr16: + MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>; + +// mpyhh: multiply high-high (returns the 32-bit result from multiplying +// the top 16 bits of the $rA, $rB) + +class MPYHHInst<dag OOL, dag IOL>: + RRForm<0b01100011110, OOL, IOL, + "mpyhh\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHv8i16: + MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHr32: + MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +// mpyhha: Multiply high-high, add to $rT: + +class MPYHHAInst<dag OOL, dag IOL>: + RRForm<0b01100010110, OOL, IOL, + "mpyhha\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHAvec: + MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHAr32: + MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +// mpyhhu: Multiply high-high, unsigned, e.g.: +// +// +-------+-------+ +-------+-------+ +---------+ +// | a0 . a1 | x | b0 . b1 | = | a0 x b0 | +// +-------+-------+ +-------+-------+ +---------+ +// +// where a0, b0 are the upper 16 bits of the 32-bit word + +class MPYHHUInst<dag OOL, dag IOL>: + RRForm<0b01110011110, OOL, IOL, + "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHUv4i32: + MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHUr32: + MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +// mpyhhau: Multiply high-high, unsigned + +class MPYHHAUInst<dag OOL, dag IOL>: + RRForm<0b01110010110, OOL, IOL, + "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv, + [/* no pattern */]>; + +def MPYHHAUvec: + MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>; + +def MPYHHAUr32: + MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// clz: Count leading zeroes +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +class CLZInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA", + IntegerOp, pattern>; + +class CLZRegInst<RegisterClass rclass>: + CLZInst<(outs rclass:$rT), (ins rclass:$rA), + [(set rclass:$rT, (ctlz rclass:$rA))]>; + +class CLZVecInst<ValueType vectype>: + CLZInst<(outs VECREG:$rT), (ins VECREG:$rA), + [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>; + +multiclass CountLeadingZeroes { + def v4i32 : CLZVecInst<v4i32>; + def r32 : CLZRegInst<R32C>; +} + +defm CLZ : CountLeadingZeroes; + +// cntb: Count ones in bytes (aka "population count") +// +// NOTE: This instruction is really a vector instruction, but the custom +// lowering code uses it in unorthodox ways to support CTPOP for other +// data types! + +def CNTBv16i8: + RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), + "cntb\t$rT, $rA", IntegerOp, + [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>; + +def CNTBv8i16 : + RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), + "cntb\t$rT, $rA", IntegerOp, + [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>; + +def CNTBv4i32 : + RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA), + "cntb\t$rT, $rA", IntegerOp, + [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>; + +// gbb: Gather the low order bits from each byte in $rA into a single 16-bit +// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are +// slots 1-3. +// +// Note: This instruction "pairs" with the fsmb instruction for all of the +// various types defined here. +// +// Note 2: The "VecInst" and "RegInst" forms refer to the result being either +// a vector or register. + +class GBBInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>; + +class GBBRegInst<RegisterClass rclass, ValueType vectype>: + GBBInst<(outs rclass:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +class GBBVecInst<ValueType vectype>: + GBBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +multiclass GatherBitsFromBytes { + def v16i8_r32: GBBRegInst<R32C, v16i8>; + def v16i8_r16: GBBRegInst<R16C, v16i8>; + def v16i8: GBBVecInst<v16i8>; +} + +defm GBB: GatherBitsFromBytes; + +// gbh: Gather all low order bits from each halfword in $rA into a single +// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0 +// and slots 1-3 also set to 0. +// +// See notes for GBBInst, above. + +class GBHInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp, + pattern>; + +class GBHRegInst<RegisterClass rclass, ValueType vectype>: + GBHInst<(outs rclass:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +class GBHVecInst<ValueType vectype>: + GBHInst<(outs VECREG:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +multiclass GatherBitsHalfword { + def v8i16_r32: GBHRegInst<R32C, v8i16>; + def v8i16_r16: GBHRegInst<R16C, v8i16>; + def v8i16: GBHVecInst<v8i16>; +} + +defm GBH: GatherBitsHalfword; + +// gb: Gather all low order bits from each word in $rA into a single +// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0, +// as well as slots 1-3. +// +// See notes for gbb, above. + +class GBInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp, + pattern>; + +class GBRegInst<RegisterClass rclass, ValueType vectype>: + GBInst<(outs rclass:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +class GBVecInst<ValueType vectype>: + GBInst<(outs VECREG:$rT), (ins VECREG:$rA), + [/* no pattern */]>; + +multiclass GatherBitsWord { + def v4i32_r32: GBRegInst<R32C, v4i32>; + def v4i32_r16: GBRegInst<R16C, v4i32>; + def v4i32: GBVecInst<v4i32>; +} + +defm GB: GatherBitsWord; + +// avgb: average bytes +def AVGB: + RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "avgb\t$rT, $rA, $rB", ByteOp, + []>; + +// absdb: absolute difference of bytes +def ABSDB: + RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "absdb\t$rT, $rA, $rB", ByteOp, + []>; + +// sumb: sum bytes into halfwords +def SUMB: + RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "sumb\t$rT, $rA, $rB", ByteOp, + []>; + +// Sign extension operations: +class XSBHInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b01101101010, OOL, IOL, + "xsbh\t$rDst, $rSrc", + IntegerOp, pattern>; + +class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>: + XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), + pattern>; + +multiclass ExtendByteHalfword { + def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [ + /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>; + def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), + [(set R16C:$rDst, (sext R8C:$rSrc))]>; + def r16: XSBHInRegInst<R16C, + [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>; + + // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit + // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32 + // pattern below). Intentionally doesn't match a pattern because we want the + // sext 8->32 pattern to do the work for us, namely because we need the extra + // XSHWr32. + def r32: XSBHInRegInst<R32C, [/* no pattern */]>; + + // Same as the 32-bit version, but for i64 + def r64: XSBHInRegInst<R64C, [/* no pattern */]>; +} + +defm XSBH : ExtendByteHalfword; + +// Sign extend halfwords to words: + +class XSHWInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc", + IntegerOp, pattern>; + +class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>: + XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc), + [(set (out_vectype VECREG:$rDest), + (sext (in_vectype VECREG:$rSrc)))]>; + +class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>: + XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc), + pattern>; + +class XSHWRegInst<RegisterClass rclass>: + XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc), + [(set rclass:$rDest, (sext R16C:$rSrc))]>; + +multiclass ExtendHalfwordWord { + def v4i32: XSHWVecInst<v8i16, v4i32>; + + def r16: XSHWRegInst<R32C>; + + def r32: XSHWInRegInst<R32C, + [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>; + def r64: XSHWInRegInst<R64C, [/* no pattern */]>; +} + +defm XSHW : ExtendHalfwordWord; + +// Sign-extend words to doublewords (32->64 bits) + +class XSWDInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc", + IntegerOp, pattern>; + +class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>: + XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [/*(set (out_vectype VECREG:$rDst), + (sext (out_vectype VECREG:$rSrc)))*/]>; + +class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>: + XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc), + [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>; + +multiclass ExtendWordToDoubleWord { + def v2i64: XSWDVecInst<v4i32, v2i64>; + def r64: XSWDRegInst<R32C, R64C>; + + def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc), + [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>; +} + +defm XSWD : ExtendWordToDoubleWord; + +// AND operations + +class ANDInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ANDVecInst<ValueType vectype>: + ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class ANDRegInst<RegisterClass rclass>: + ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>; + +multiclass BitwiseAnd +{ + def v16i8: ANDVecInst<v16i8>; + def v8i16: ANDVecInst<v8i16>; + def v4i32: ANDVecInst<v4i32>; + def v2i64: ANDVecInst<v2i64>; + + def r128: ANDRegInst<GPRC>; + def r64: ANDRegInst<R64C>; + def r32: ANDRegInst<R32C>; + def r16: ANDRegInst<R16C>; + def r8: ANDRegInst<R8C>; + + //===--------------------------------------------- + // Special instructions to perform the fabs instruction + def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), + [/* Intentionally does not match a pattern */]>; + + def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), + [/* Intentionally does not match a pattern */]>; + + def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* Intentionally does not match a pattern */]>; + + //===--------------------------------------------- + + // Hacked form of AND to zero-extend 16-bit quantities to 32-bit + // quantities -- see 16->32 zext pattern. + // + // This pattern is somewhat artificial, since it might match some + // compiler generated pattern but it is unlikely to do so. + + def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB), + [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>; +} + +defm AND : BitwiseAnd; + + +def vnot_cell_conv : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>; + +// N.B.: vnot_cell_conv is one of those special target selection pattern +// fragments, +// in which we expect there to be a bit_convert on the constant. Bear in mind +// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a +// constant -1 vector.) + +class ANDCInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>: + ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (and (vectype VECREG:$rA), + (vnot_frag (vectype VECREG:$rB))))]>; + +class ANDCRegInst<RegisterClass rclass>: + ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>; + +multiclass AndComplement +{ + def v16i8: ANDCVecInst<v16i8>; + def v8i16: ANDCVecInst<v8i16>; + def v4i32: ANDCVecInst<v4i32>; + def v2i64: ANDCVecInst<v2i64>; + + def r128: ANDCRegInst<GPRC>; + def r64: ANDCRegInst<R64C>; + def r32: ANDCRegInst<R32C>; + def r16: ANDCRegInst<R16C>; + def r8: ANDCRegInst<R8C>; + + // Sometimes, the xor pattern has a bitcast constant: + def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>; +} + +defm ANDC : AndComplement; + +class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass AndByteImm +{ + def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (v16i8 VECREG:$rT), + (and (v16i8 VECREG:$rA), + (v16i8 v16i8U8Imm:$val)))]>; + + def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>; +} + +defm ANDBI : AndByteImm; + +class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass AndHalfwordImm +{ + def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>; + + def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), + [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>; + + // Zero-extend i8 to i16: + def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val), + [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>; +} + +defm ANDHI : AndHalfwordImm; + +class ANDIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val", + IntegerOp, pattern>; + +multiclass AndWordImm +{ + def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>; + + def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>; + + // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32 + // pattern below. + def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), + [(set R32C:$rT, + (and (zext R8C:$rA), i32ImmSExt10:$val))]>; + + // Hacked form of ANDI to zero-extend i16 quantities to i32. See the + // zext 16->32 pattern below. + // + // Note that this pattern is somewhat artificial, since it might match + // something the compiler generates but is unlikely to occur in practice. + def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), + [(set R32C:$rT, + (and (zext R16C:$rA), i32ImmSExt10:$val))]>; +} + +defm ANDI : AndWordImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Bitwise OR group: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// Bitwise "or" (N.B.: These are also register-register copy instructions...) +class ORInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ORVecInst<ValueType vectype>: + ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class ORRegInst<RegisterClass rclass>: + ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>; + + +multiclass BitwiseOr +{ + def v16i8: ORVecInst<v16i8>; + def v8i16: ORVecInst<v8i16>; + def v4i32: ORVecInst<v4i32>; + def v2i64: ORVecInst<v2i64>; + + def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4f32 VECREG:$rT), + (v4f32 (bitconvert (or (v4i32 VECREG:$rA), + (v4i32 VECREG:$rB)))))]>; + + def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v2f64 VECREG:$rT), + (v2f64 (bitconvert (or (v2i64 VECREG:$rA), + (v2i64 VECREG:$rB)))))]>; + + def r128: ORRegInst<GPRC>; + def r64: ORRegInst<R64C>; + def r32: ORRegInst<R32C>; + def r16: ORRegInst<R16C>; + def r8: ORRegInst<R8C>; + + // OR instructions used to copy f32 and f64 registers. + def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [/* no pattern */]>; + + def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + [/* no pattern */]>; +} + +defm OR : BitwiseOr; + +//===----------------------------------------------------------------------===// +// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers +//===----------------------------------------------------------------------===// +def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)), + (COPY_TO_REGCLASS R8C:$rA, VECREG)>; + +def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)), + (COPY_TO_REGCLASS R16C:$rA, VECREG)>; + +def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)), + (COPY_TO_REGCLASS R32C:$rA, VECREG)>; + +def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)), + (COPY_TO_REGCLASS R64C:$rA, VECREG)>; + +def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)), + (COPY_TO_REGCLASS R32FP:$rA, VECREG)>; + +def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)), + (COPY_TO_REGCLASS R64FP:$rA, VECREG)>; + +def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))), + (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>; + +def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))), + (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>; + +def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))), + (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>; + +def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))), + (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>; + +def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))), + (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>; + +def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))), + (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>; + +// Load Register: This is an assembler alias for a bitwise OR of a register +// against itself. It's here because it brings some clarity to assembly +// language output. + +let hasCtrlDep = 1 in { + class LRInst<dag OOL, dag IOL> + : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> { + bits<7> RA; + bits<7> RT; + + let Pattern = [/*no pattern*/]; + + let Inst{0-10} = 0b10000010000; /* It's an OR operation */ + let Inst{11-17} = RA; + let Inst{18-24} = RA; + let Inst{25-31} = RT; + } + + class LRVecInst<ValueType vectype>: + LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>; + + class LRRegInst<RegisterClass rclass>: + LRInst<(outs rclass:$rT), (ins rclass:$rA)>; + + multiclass LoadRegister { + def v2i64: LRVecInst<v2i64>; + def v2f64: LRVecInst<v2f64>; + def v4i32: LRVecInst<v4i32>; + def v4f32: LRVecInst<v4f32>; + def v8i16: LRVecInst<v8i16>; + def v16i8: LRVecInst<v16i8>; + + def r128: LRRegInst<GPRC>; + def r64: LRRegInst<R64C>; + def f64: LRRegInst<R64FP>; + def r32: LRRegInst<R32C>; + def f32: LRRegInst<R32FP>; + def r16: LRRegInst<R16C>; + def r8: LRRegInst<R8C>; + } + + defm LR: LoadRegister; +} + +// ORC: Bitwise "or" with complement (c = a | ~b) + +class ORCInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class ORCVecInst<ValueType vectype>: + ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + (vnot (vectype VECREG:$rB))))]>; + +class ORCRegInst<RegisterClass rclass>: + ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>; + +multiclass BitwiseOrComplement +{ + def v16i8: ORCVecInst<v16i8>; + def v8i16: ORCVecInst<v8i16>; + def v4i32: ORCVecInst<v4i32>; + def v2i64: ORCVecInst<v2i64>; + + def r128: ORCRegInst<GPRC>; + def r64: ORCRegInst<R64C>; + def r32: ORCRegInst<R32C>; + def r16: ORCRegInst<R16C>; + def r8: ORCRegInst<R8C>; +} + +defm ORC : BitwiseOrComplement; + +// OR byte immediate +class ORBIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val", + IntegerOp, pattern>; + +class ORBIVecInst<ValueType vectype, PatLeaf immpred>: + ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA), + (vectype immpred:$val)))]>; + +multiclass BitwiseOrByteImm +{ + def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>; + + def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>; +} + +defm ORBI : BitwiseOrByteImm; + +// OR halfword immediate +class ORHIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val", + IntegerOp, pattern>; + +class ORHIVecInst<ValueType vectype, PatLeaf immpred>: + ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + immpred:$val))]>; + +multiclass BitwiseOrHalfwordImm +{ + def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>; + + def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val), + [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>; + + // Specialized ORHI form used to promote 8-bit registers to 16-bit + def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val), + [(set R16C:$rT, (or (anyext R8C:$rA), + i16ImmSExt10:$val))]>; +} + +defm ORHI : BitwiseOrHalfwordImm; + +class ORIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val", + IntegerOp, pattern>; + +class ORIVecInst<ValueType vectype, PatLeaf immpred>: + ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA), + immpred:$val))]>; + +// Bitwise "or" with immediate +multiclass BitwiseOrImm +{ + def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>; + + def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or R32C:$rA, i32ImmSExt10:$val))]>; + + // i16i32: hacked version of the ori instruction to extend 16-bit quantities + // to 32-bit quantities. used exclusively to match "anyext" conversions (vide + // infra "anyext 16->32" pattern.) + def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or (anyext R16C:$rA), + i32ImmSExt10:$val))]>; + + // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities + // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide + // infra "anyext 16->32" pattern.) + def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (or (anyext R8C:$rA), + i32ImmSExt10:$val))]>; +} + +defm ORI : BitwiseOrImm; + +// ORX: "or" across the vector: or's $rA's word slots leaving the result in +// $rT[0], slots 1-3 are zeroed. +// +// FIXME: Needs to match an intrinsic pattern. +def ORXv4i32: + RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "orx\t$rT, $rA, $rB", IntegerOp, + []>; + +// XOR: + +class XORInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class XORVecInst<ValueType vectype>: + XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA), + (vectype VECREG:$rB)))]>; + +class XORRegInst<RegisterClass rclass>: + XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>; + +multiclass BitwiseExclusiveOr +{ + def v16i8: XORVecInst<v16i8>; + def v8i16: XORVecInst<v8i16>; + def v4i32: XORVecInst<v4i32>; + def v2i64: XORVecInst<v2i64>; + + def r128: XORRegInst<GPRC>; + def r64: XORRegInst<R64C>; + def r32: XORRegInst<R32C>; + def r16: XORRegInst<R16C>; + def r8: XORRegInst<R8C>; + + // XOR instructions used to negate f32 and f64 quantities. + + def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB), + [/* no pattern */]>; + + def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB), + [/* no pattern */]>; + + def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* no pattern, see fneg{32,64} */]>; +} + +defm XOR : BitwiseExclusiveOr; + +//==---------------------------------------------------------- + +class XORBIInst<dag OOL, dag IOL, list<dag> pattern>: + RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val", + IntegerOp, pattern>; + +multiclass XorByteImm +{ + def v16i8: + XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>; + + def r8: + XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val), + [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>; +} + +defm XORBI : XorByteImm; + +def XORHIv8i16: + RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val), + "xorhi\t$rT, $rA, $val", IntegerOp, + [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA), + v8i16SExt10Imm:$val))]>; + +def XORHIr16: + RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + "xorhi\t$rT, $rA, $val", IntegerOp, + [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>; + +def XORIv4i32: + RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val), + "xori\t$rT, $rA, $val", IntegerOp, + [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA), + v4i32SExt10Imm:$val))]>; + +def XORIr32: + RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + "xori\t$rT, $rA, $val", IntegerOp, + [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>; + +// NAND: + +class NANDInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class NANDVecInst<ValueType vectype>: + NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA), + (vectype VECREG:$rB))))]>; +class NANDRegInst<RegisterClass rclass>: + NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>; + +multiclass BitwiseNand +{ + def v16i8: NANDVecInst<v16i8>; + def v8i16: NANDVecInst<v8i16>; + def v4i32: NANDVecInst<v4i32>; + def v2i64: NANDVecInst<v2i64>; + + def r128: NANDRegInst<GPRC>; + def r64: NANDRegInst<R64C>; + def r32: NANDRegInst<R32C>; + def r16: NANDRegInst<R16C>; + def r8: NANDRegInst<R8C>; +} + +defm NAND : BitwiseNand; + +// NOR: + +class NORInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class NORVecInst<ValueType vectype>: + NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA), + (vectype VECREG:$rB))))]>; +class NORRegInst<RegisterClass rclass>: + NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>; + +multiclass BitwiseNor +{ + def v16i8: NORVecInst<v16i8>; + def v8i16: NORVecInst<v8i16>; + def v4i32: NORVecInst<v4i32>; + def v2i64: NORVecInst<v2i64>; + + def r128: NORRegInst<GPRC>; + def r64: NORRegInst<R64C>; + def r32: NORRegInst<R32C>; + def r16: NORRegInst<R16C>; + def r8: NORRegInst<R8C>; +} + +defm NOR : BitwiseNor; + +// Select bits: +class SELBInst<dag OOL, dag IOL, list<dag> pattern>: + RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC", + IntegerOp, pattern>; + +class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (vectype VECREG:$rT), + (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)), + (and (vnot_frag (vectype VECREG:$rC)), + (vectype VECREG:$rA))))]>; + +class SELBVecVCondInst<ValueType vectype>: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (vectype VECREG:$rT), + (select (vectype VECREG:$rC), + (vectype VECREG:$rB), + (vectype VECREG:$rA)))]>; + +class SELBVecCondInst<ValueType vectype>: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC), + [(set (vectype VECREG:$rT), + (select R32C:$rC, + (vectype VECREG:$rB), + (vectype VECREG:$rA)))]>; + +class SELBRegInst<RegisterClass rclass>: + SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC), + [(set rclass:$rT, + (or (and rclass:$rB, rclass:$rC), + (and rclass:$rA, (not rclass:$rC))))]>; + +class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>: + SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC), + [(set rclass:$rT, + (select rcond:$rC, rclass:$rB, rclass:$rA))]>; + +multiclass SelectBits +{ + def v16i8: SELBVecInst<v16i8>; + def v8i16: SELBVecInst<v8i16>; + def v4i32: SELBVecInst<v4i32>; + def v2i64: SELBVecInst<v2i64, vnot_cell_conv>; + + def r128: SELBRegInst<GPRC>; + def r64: SELBRegInst<R64C>; + def r32: SELBRegInst<R32C>; + def r16: SELBRegInst<R16C>; + def r8: SELBRegInst<R8C>; + + def v16i8_cond: SELBVecCondInst<v16i8>; + def v8i16_cond: SELBVecCondInst<v8i16>; + def v4i32_cond: SELBVecCondInst<v4i32>; + def v2i64_cond: SELBVecCondInst<v2i64>; + + def v16i8_vcond: SELBVecCondInst<v16i8>; + def v8i16_vcond: SELBVecCondInst<v8i16>; + def v4i32_vcond: SELBVecCondInst<v4i32>; + def v2i64_vcond: SELBVecCondInst<v2i64>; + + def v4f32_cond: + SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (v4f32 VECREG:$rT), + (select (v4i32 VECREG:$rC), + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rA)))]>; + + // SELBr64_cond is defined in SPU64InstrInfo.td + def r32_cond: SELBRegCondInst<R32C, R32C>; + def f32_cond: SELBRegCondInst<R32C, R32FP>; + def r16_cond: SELBRegCondInst<R16C, R16C>; + def r8_cond: SELBRegCondInst<R8C, R8C>; +} + +defm SELB : SelectBits; + +class SPUselbPatVec<ValueType vectype, SPUInstr inst>: + Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)), + (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>; + +def : SPUselbPatVec<v16i8, SELBv16i8>; +def : SPUselbPatVec<v8i16, SELBv8i16>; +def : SPUselbPatVec<v4i32, SELBv4i32>; +def : SPUselbPatVec<v2i64, SELBv2i64>; + +class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>: + Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC), + (inst rclass:$rA, rclass:$rB, rclass:$rC)>; + +def : SPUselbPatReg<R8C, SELBr8>; +def : SPUselbPatReg<R16C, SELBr16>; +def : SPUselbPatReg<R32C, SELBr32>; +def : SPUselbPatReg<R64C, SELBr64>; + +// EQV: Equivalence (1 for each same bit, otherwise 0) +// +// Note: There are a lot of ways to match this bit operator and these patterns +// attempt to be as exhaustive as possible. + +class EQVInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB", + IntegerOp, pattern>; + +class EQVVecInst<ValueType vectype>: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), + (and (vnot (vectype VECREG:$rA)), + (vnot (vectype VECREG:$rB)))))]>; + +class EQVRegInst<RegisterClass rclass>: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB), + (and (not rclass:$rA), (not rclass:$rB))))]>; + +class EQVVecPattern1<ValueType vectype>: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>; + +class EQVRegPattern1<RegisterClass rclass>: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>; + +class EQVVecPattern2<ValueType vectype>: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)), + (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>; + +class EQVRegPattern2<RegisterClass rclass>: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, + (or (and rclass:$rA, rclass:$rB), + (not (or rclass:$rA, rclass:$rB))))]>; + +class EQVVecPattern3<ValueType vectype>: + EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>; + +class EQVRegPattern3<RegisterClass rclass>: + EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>; + +multiclass BitEquivalence +{ + def v16i8: EQVVecInst<v16i8>; + def v8i16: EQVVecInst<v8i16>; + def v4i32: EQVVecInst<v4i32>; + def v2i64: EQVVecInst<v2i64>; + + def v16i8_1: EQVVecPattern1<v16i8>; + def v8i16_1: EQVVecPattern1<v8i16>; + def v4i32_1: EQVVecPattern1<v4i32>; + def v2i64_1: EQVVecPattern1<v2i64>; + + def v16i8_2: EQVVecPattern2<v16i8>; + def v8i16_2: EQVVecPattern2<v8i16>; + def v4i32_2: EQVVecPattern2<v4i32>; + def v2i64_2: EQVVecPattern2<v2i64>; + + def v16i8_3: EQVVecPattern3<v16i8>; + def v8i16_3: EQVVecPattern3<v8i16>; + def v4i32_3: EQVVecPattern3<v4i32>; + def v2i64_3: EQVVecPattern3<v2i64>; + + def r128: EQVRegInst<GPRC>; + def r64: EQVRegInst<R64C>; + def r32: EQVRegInst<R32C>; + def r16: EQVRegInst<R16C>; + def r8: EQVRegInst<R8C>; + + def r128_1: EQVRegPattern1<GPRC>; + def r64_1: EQVRegPattern1<R64C>; + def r32_1: EQVRegPattern1<R32C>; + def r16_1: EQVRegPattern1<R16C>; + def r8_1: EQVRegPattern1<R8C>; + + def r128_2: EQVRegPattern2<GPRC>; + def r64_2: EQVRegPattern2<R64C>; + def r32_2: EQVRegPattern2<R32C>; + def r16_2: EQVRegPattern2<R16C>; + def r8_2: EQVRegPattern2<R8C>; + + def r128_3: EQVRegPattern3<GPRC>; + def r64_3: EQVRegPattern3<R64C>; + def r32_3: EQVRegPattern3<R32C>; + def r16_3: EQVRegPattern3<R16C>; + def r8_3: EQVRegPattern3<R8C>; +} + +defm EQV: BitEquivalence; + +//===----------------------------------------------------------------------===// +// Vector shuffle... +//===----------------------------------------------------------------------===// +// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB. +// See the SPUshuffle SDNode operand above, which sets up the DAG pattern +// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with +// the SPUISD::SHUFB opcode. +//===----------------------------------------------------------------------===// + +class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>: + RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC", + ShuffleOp, pattern>; + +class SHUFBVecInst<ValueType resultvec, ValueType maskvec>: + SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + [(set (resultvec VECREG:$rT), + (SPUshuffle (resultvec VECREG:$rA), + (resultvec VECREG:$rB), + (maskvec VECREG:$rC)))]>; + +class SHUFBGPRCInst: + SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC), + [/* no pattern */]>; + +multiclass ShuffleBytes +{ + def v16i8 : SHUFBVecInst<v16i8, v16i8>; + def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>; + def v8i16 : SHUFBVecInst<v8i16, v16i8>; + def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>; + def v4i32 : SHUFBVecInst<v4i32, v16i8>; + def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>; + def v2i64 : SHUFBVecInst<v2i64, v16i8>; + def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>; + + def v4f32 : SHUFBVecInst<v4f32, v16i8>; + def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>; + + def v2f64 : SHUFBVecInst<v2f64, v16i8>; + def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>; + + def gprc : SHUFBGPRCInst; +} + +defm SHUFB : ShuffleBytes; + +//===----------------------------------------------------------------------===// +// Shift and rotate group: +//===----------------------------------------------------------------------===// + +class SHLHInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB", + RotShiftVec, pattern>; + +class SHLHVecInst<ValueType vectype>: + SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +multiclass ShiftLeftHalfword +{ + def v8i16: SHLHVecInst<v8i16>; + def r16: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>; + def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>; +} + +defm SHLH : ShiftLeftHalfword; + +//===----------------------------------------------------------------------===// + +class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val", + RotShiftVec, pattern>; + +class SHLHIVecInst<ValueType vectype>: + SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + [(set (vectype VECREG:$rT), + (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>; + +multiclass ShiftLeftHalfwordImm +{ + def v8i16: SHLHIVecInst<v8i16>; + def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), + [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>; +} + +defm SHLHI : ShiftLeftHalfwordImm; + +def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)), + (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>; + +def : Pat<(shl R16C:$rA, (i32 uimm7:$val)), + (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>; + +//===----------------------------------------------------------------------===// + +class SHLInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB", + RotShiftVec, pattern>; + +multiclass ShiftLeftWord +{ + def v4i32: + SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + def r32: + SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; +} + +defm SHL: ShiftLeftWord; + +//===----------------------------------------------------------------------===// + +class SHLIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val", + RotShiftVec, pattern>; + +multiclass ShiftLeftWordImm +{ + def v4i32: + SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), + [(set (v4i32 VECREG:$rT), + (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>; + + def r32: + SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val), + [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>; +} + +defm SHLI : ShiftLeftWordImm; + +//===----------------------------------------------------------------------===// +// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit +// register) to the left. Vector form is here to ensure type correctness. +// +// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift +// of 7 bits is actually possible. +// +// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI +// to shift i64 and i128. SHLQBI is the residual left over after shifting by +// bytes with SHLQBY. + +class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class SHLQBIVecInst<ValueType vectype>: + SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>; + +class SHLQBIRegInst<RegisterClass rclass>: + SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass ShiftLeftQuadByBits +{ + def v16i8: SHLQBIVecInst<v16i8>; + def v8i16: SHLQBIVecInst<v8i16>; + def v4i32: SHLQBIVecInst<v4i32>; + def v4f32: SHLQBIVecInst<v4f32>; + def v2i64: SHLQBIVecInst<v2i64>; + def v2f64: SHLQBIVecInst<v2f64>; + + def r128: SHLQBIRegInst<GPRC>; +} + +defm SHLQBI : ShiftLeftQuadByBits; + +// See note above on SHLQBI. In this case, the predicate actually does then +// enforcement, whereas with SHLQBI, we have to "take it on faith." +class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val", + RotShiftQuad, pattern>; + +class SHLQBIIVecInst<ValueType vectype>: + SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>; + +multiclass ShiftLeftQuadByBitsImm +{ + def v16i8 : SHLQBIIVecInst<v16i8>; + def v8i16 : SHLQBIIVecInst<v8i16>; + def v4i32 : SHLQBIIVecInst<v4i32>; + def v4f32 : SHLQBIIVecInst<v4f32>; + def v2i64 : SHLQBIIVecInst<v2i64>; + def v2f64 : SHLQBIIVecInst<v2f64>; +} + +defm SHLQBII : ShiftLeftQuadByBitsImm; + +// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes, +// not by bits. See notes above on SHLQBI. + +class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class SHLQBYVecInst<ValueType vectype>: + SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>; + +multiclass ShiftLeftQuadBytes +{ + def v16i8: SHLQBYVecInst<v16i8>; + def v8i16: SHLQBYVecInst<v8i16>; + def v4i32: SHLQBYVecInst<v4i32>; + def v4f32: SHLQBYVecInst<v4f32>; + def v2i64: SHLQBYVecInst<v2i64>; + def v2f64: SHLQBYVecInst<v2f64>; + def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), + [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>; +} + +defm SHLQBY: ShiftLeftQuadBytes; + +class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val", + RotShiftQuad, pattern>; + +class SHLQBYIVecInst<ValueType vectype>: + SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val), + [(set (vectype VECREG:$rT), + (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>; + +multiclass ShiftLeftQuadBytesImm +{ + def v16i8: SHLQBYIVecInst<v16i8>; + def v8i16: SHLQBYIVecInst<v8i16>; + def v4i32: SHLQBYIVecInst<v4i32>; + def v4f32: SHLQBYIVecInst<v4f32>; + def v2i64: SHLQBYIVecInst<v2i64>; + def v2f64: SHLQBYIVecInst<v2f64>; + def r128: SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val), + [(set GPRC:$rT, + (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>; +} + +defm SHLQBYI : ShiftLeftQuadBytesImm; + +class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class SHLQBYBIVecInst<ValueType vectype>: + SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern */]>; + +class SHLQBYBIRegInst<RegisterClass rclass>: + SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass ShiftLeftQuadBytesBitCount +{ + def v16i8: SHLQBYBIVecInst<v16i8>; + def v8i16: SHLQBYBIVecInst<v8i16>; + def v4i32: SHLQBYBIVecInst<v4i32>; + def v4f32: SHLQBYBIVecInst<v4f32>; + def v2i64: SHLQBYBIVecInst<v2i64>; + def v2f64: SHLQBYBIVecInst<v2f64>; + + def r128: SHLQBYBIRegInst<GPRC>; +} + +defm SHLQBYBI : ShiftLeftQuadBytesBitCount; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate halfword: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +class ROTHInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB", + RotShiftVec, pattern>; + +class ROTHVecInst<ValueType vectype>: + ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>; + +class ROTHRegInst<RegisterClass rclass>: + ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB), + [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>; + +multiclass RotateLeftHalfword +{ + def v8i16: ROTHVecInst<v8i16>; + def r16: ROTHRegInst<R16C>; +} + +defm ROTH: RotateLeftHalfword; + +def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate halfword, immediate: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val", + RotShiftVec, pattern>; + +class ROTHIVecInst<ValueType vectype>: + ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val), + [(set (vectype VECREG:$rT), + (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>; + +multiclass RotateLeftHalfwordImm +{ + def v8i16: ROTHIVecInst<v8i16>; + def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val), + [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>; + def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val), + [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>; +} + +defm ROTHI: RotateLeftHalfwordImm; + +def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)), + (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate word: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB", + RotShiftVec, pattern>; + +class ROTVecInst<ValueType vectype>: + ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [(set (vectype VECREG:$rT), + (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>; + +class ROTRegInst<RegisterClass rclass>: + ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [(set rclass:$rT, + (rotl rclass:$rA, R32C:$rB))]>; + +multiclass RotateLeftWord +{ + def v4i32: ROTVecInst<v4i32>; + def r32: ROTRegInst<R32C>; +} + +defm ROT: RotateLeftWord; + +// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or +// 32-bit register +def ROTr32_r16_anyext: + ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB), + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>; + +def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))), + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; + +def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))), + (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>; + +def ROTr32_r8_anyext: + ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB), + [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>; + +def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))), + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; + +def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))), + (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate word, immediate +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val", + RotShiftVec, pattern>; + +class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>: + ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), + [(set (vectype VECREG:$rT), + (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>; + +class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>: + ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), + [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>; + +multiclass RotateLeftWordImm +{ + def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>; + def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>; + def v4i32_i8: ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>; + + def r32: ROTIRegInst<R32C, u7imm_i32, i32, uimm7>; + def r32_i16: ROTIRegInst<R32C, u7imm, i16, uimm7>; + def r32_i8: ROTIRegInst<R32C, u7imm_i8, i8, uimm7>; +} + +defm ROTI : RotateLeftWordImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad by byte (count) +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class ROTQBYGenInst<ValueType type, RegisterClass rc>: + ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB), + [(set (type rc:$rT), + (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>; + +class ROTQBYVecInst<ValueType type>: + ROTQBYGenInst<type, VECREG>; + +multiclass RotateQuadLeftByBytes +{ + def v16i8: ROTQBYVecInst<v16i8>; + def v8i16: ROTQBYVecInst<v8i16>; + def v4i32: ROTQBYVecInst<v4i32>; + def v4f32: ROTQBYVecInst<v4f32>; + def v2i64: ROTQBYVecInst<v2i64>; + def v2f64: ROTQBYVecInst<v2f64>; + def i128: ROTQBYGenInst<i128, GPRC>; +} + +defm ROTQBY: RotateQuadLeftByBytes; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad by byte (count), immediate +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val", + RotShiftQuad, pattern>; + +class ROTQBYIGenInst<ValueType type, RegisterClass rclass>: + ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val), + [(set (type rclass:$rT), + (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>; + +class ROTQBYIVecInst<ValueType vectype>: + ROTQBYIGenInst<vectype, VECREG>; + +multiclass RotateQuadByBytesImm +{ + def v16i8: ROTQBYIVecInst<v16i8>; + def v8i16: ROTQBYIVecInst<v8i16>; + def v4i32: ROTQBYIVecInst<v4i32>; + def v4f32: ROTQBYIVecInst<v4f32>; + def v2i64: ROTQBYIVecInst<v2i64>; + def vfi64: ROTQBYIVecInst<v2f64>; + def i128: ROTQBYIGenInst<i128, GPRC>; +} + +defm ROTQBYI: RotateQuadByBytesImm; + +// See ROTQBY note above. +class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b00110011100, OOL, IOL, + "rotqbybi\t$rT, $rA, $shift", + RotShiftQuad, pattern>; + +class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>: + ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift), + [(set (vectype VECREG:$rT), + (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>; + +multiclass RotateQuadByBytesByBitshift { + def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>; + def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>; + def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>; + def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>; +} + +defm ROTQBYBI : RotateQuadByBytesByBitshift; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// See ROTQBY note above. +// +// Assume that the user of this instruction knows to shift the rotate count +// into bit 29 +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class ROTQBIVecInst<ValueType vectype>: + ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern yet */]>; + +class ROTQBIRegInst<RegisterClass rclass>: + ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern yet */]>; + +multiclass RotateQuadByBitCount +{ + def v16i8: ROTQBIVecInst<v16i8>; + def v8i16: ROTQBIVecInst<v8i16>; + def v4i32: ROTQBIVecInst<v4i32>; + def v2i64: ROTQBIVecInst<v2i64>; + + def r128: ROTQBIRegInst<GPRC>; + def r64: ROTQBIRegInst<R64C>; +} + +defm ROTQBI: RotateQuadByBitCount; + +class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val", + RotShiftQuad, pattern>; + +class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype, + PatLeaf pred>: + ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val), + [/* no pattern yet */]>; + +class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, + PatLeaf pred>: + ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), + [/* no pattern yet */]>; + +multiclass RotateQuadByBitCountImm +{ + def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>; + def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>; + def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>; + def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>; + + def r128: ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>; + def r64: ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>; +} + +defm ROTQBII : RotateQuadByBitCountImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// ROTHM v8i16 form: +// NOTE(1): No vector rotate is generated by the C/C++ frontend (today), +// so this only matches a synthetically generated/lowered code +// fragment. +// NOTE(2): $rB must be negated before the right rotate! +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB", + RotShiftVec, pattern>; + +def ROTHMv8i16: + ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; + +// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left +// Note: This instruction doesn't match a pattern because rB must be negated +// for the instruction to work. Thus, the pattern below the instruction! + +def ROTHMr16: + ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + [/* see patterns below - $rB must be negated! */]>; + +def : Pat<(srl R16C:$rA, R32C:$rB), + (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(srl R16C:$rA, R16C:$rB), + (ROTHMr16 R16C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(srl R16C:$rA, R8C:$rB), + (ROTHMr16 R16C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; + +// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is +// that the immediate can be complemented, so that the user doesn't have to +// worry about it. + +class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val", + RotShiftVec, pattern>; + +def ROTHMIv8i16: + ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), + [/* no pattern */]>; + +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, imm:$val)>; + +def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>; + +def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)), + (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>; + +def ROTHMIr16: + ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val), + [/* no pattern */]>; + +def: Pat<(srl R16C:$rA, (i32 uimm7:$val)), + (ROTHMIr16 R16C:$rA, uimm7:$val)>; + +def: Pat<(srl R16C:$rA, (i16 uimm7:$val)), + (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; + +def: Pat<(srl R16C:$rA, (i8 uimm7:$val)), + (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; + +// ROTM v4i32 form: See the ROTHM v8i16 comments. +class ROTMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB", + RotShiftVec, pattern>; + +def ROTMv4i32: + ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>; + +def ROTMr32: + ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(srl R32C:$rA, R32C:$rB), + (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(srl R32C:$rA, R16C:$rB), + (ROTMr32 R32C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(srl R32C:$rA, R8C:$rB), + (ROTMr32 R32C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +// ROTMI v4i32 form: See the comment for ROTHM v8i16. +def ROTMIv4i32: + RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), + "rotmi\t$rT, $rA, $val", RotShiftVec, + [(set (v4i32 VECREG:$rT), + (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>; + +def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)), + (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>; + +def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)), + (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>; + +// ROTMI r32 form: know how to complement the immediate value. +def ROTMIr32: + RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val), + "rotmi\t$rT, $rA, $val", RotShiftVec, + [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>; + +def : Pat<(srl R32C:$rA, (i16 imm:$val)), + (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>; + +def : Pat<(srl R32C:$rA, (i8 imm:$val)), + (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// ROTQMBY: This is a vector form merely so that when used in an +// instruction pattern, type checking will succeed. This instruction assumes +// that the user knew to negate $rB. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class ROTQMBYVecInst<ValueType vectype>: + ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern, $rB must be negated */]>; + +class ROTQMBYRegInst<RegisterClass rclass>: + ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass RotateQuadBytes +{ + def v16i8: ROTQMBYVecInst<v16i8>; + def v8i16: ROTQMBYVecInst<v8i16>; + def v4i32: ROTQMBYVecInst<v4i32>; + def v2i64: ROTQMBYVecInst<v2i64>; + + def r128: ROTQMBYRegInst<GPRC>; + def r64: ROTQMBYRegInst<R64C>; +} + +defm ROTQMBY : RotateQuadBytes; + +def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB), + (ROTQMBYr128 GPRC:$rA, + (SFIr32 R32C:$rB, 0))>; + +class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val", + RotShiftQuad, pattern>; + +class ROTQMBYIVecInst<ValueType vectype>: + ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), + [/* no pattern */]>; + +class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, + PatLeaf pred>: + ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val), + [/* no pattern */]>; + +// 128-bit zero extension form: +class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>: + ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val), + [/* no pattern */]>; + +multiclass RotateQuadBytesImm +{ + def v16i8: ROTQMBYIVecInst<v16i8>; + def v8i16: ROTQMBYIVecInst<v8i16>; + def v4i32: ROTQMBYIVecInst<v4i32>; + def v2i64: ROTQMBYIVecInst<v2i64>; + + def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>; + def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>; + + def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>; + def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>; + def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>; + def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>; +} + +defm ROTQMBYI : RotateQuadBytesImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate right and mask by bit count +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class ROTQMBYBIVecInst<ValueType vectype>: + ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern, */]>; + +multiclass RotateMaskQuadByBitCount +{ + def v16i8: ROTQMBYBIVecInst<v16i8>; + def v8i16: ROTQMBYBIVecInst<v8i16>; + def v4i32: ROTQMBYBIVecInst<v4i32>; + def v2i64: ROTQMBYBIVecInst<v2i64>; + def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB), + [/*no pattern*/]>; +} + +defm ROTQMBYBI: RotateMaskQuadByBitCount; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad and mask by bits +// Note that the rotate amount has to be negated +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB", + RotShiftQuad, pattern>; + +class ROTQMBIVecInst<ValueType vectype>: + ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + [/* no pattern */]>; + +class ROTQMBIRegInst<RegisterClass rclass>: + ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB), + [/* no pattern */]>; + +multiclass RotateMaskQuadByBits +{ + def v16i8: ROTQMBIVecInst<v16i8>; + def v8i16: ROTQMBIVecInst<v8i16>; + def v4i32: ROTQMBIVecInst<v4i32>; + def v2i64: ROTQMBIVecInst<v2i64>; + + def r128: ROTQMBIRegInst<GPRC>; + def r64: ROTQMBIRegInst<R64C>; +} + +defm ROTQMBI: RotateMaskQuadByBits; + +def : Pat<(srl GPRC:$rA, R32C:$rB), + (ROTQMBYBIr128 (ROTQMBIr128 GPRC:$rA, + (SFIr32 R32C:$rB, 0)), + (SFIr32 R32C:$rB, 0))>; + + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Rotate quad and mask by bits, immediate +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>: + RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val", + RotShiftQuad, pattern>; + +class ROTQMBIIVecInst<ValueType vectype>: + ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val), + [/* no pattern */]>; + +class ROTQMBIIRegInst<RegisterClass rclass>: + ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val), + [/* no pattern */]>; + +multiclass RotateMaskQuadByBitsImm +{ + def v16i8: ROTQMBIIVecInst<v16i8>; + def v8i16: ROTQMBIIVecInst<v8i16>; + def v4i32: ROTQMBIIVecInst<v4i32>; + def v2i64: ROTQMBIIVecInst<v2i64>; + + def r128: ROTQMBIIRegInst<GPRC>; + def r64: ROTQMBIIRegInst<R64C>; +} + +defm ROTQMBII: RotateMaskQuadByBitsImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def ROTMAHv8i16: + RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "rotmah\t$rT, $rA, $rB", RotShiftVec, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; + +def ROTMAHr16: + RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), + "rotmah\t$rT, $rA, $rB", RotShiftVec, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(sra R16C:$rA, R32C:$rB), + (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(sra R16C:$rA, R16C:$rB), + (ROTMAHr16 R16C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(sra R16C:$rA, R8C:$rB), + (ROTMAHr16 R16C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +def ROTMAHIv8i16: + RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val), + "rotmahi\t$rT, $rA, $val", RotShiftVec, + [(set (v8i16 VECREG:$rT), + (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>; + +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)), + (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>; + +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)), + (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>; + +def ROTMAHIr16: + RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val), + "rotmahi\t$rT, $rA, $val", RotShiftVec, + [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>; + +def : Pat<(sra R16C:$rA, (i32 imm:$val)), + (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; + +def : Pat<(sra R16C:$rA, (i8 imm:$val)), + (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; + +def ROTMAv4i32: + RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "rotma\t$rT, $rA, $rB", RotShiftVec, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>; + +def ROTMAr32: + RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + "rotma\t$rT, $rA, $rB", RotShiftVec, + [/* see patterns below - $rB must be negated */]>; + +def : Pat<(sra R32C:$rA, R32C:$rB), + (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>; + +def : Pat<(sra R32C:$rA, R16C:$rB), + (ROTMAr32 R32C:$rA, + (SFIr32 (XSHWr16 R16C:$rB), 0))>; + +def : Pat<(sra R32C:$rA, R8C:$rB), + (ROTMAr32 R32C:$rA, + (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; + +class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01011110000, OOL, IOL, + "rotmai\t$rT, $rA, $val", + RotShiftVec, pattern>; + +class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>: + ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val), + [(set (vectype VECREG:$rT), + (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>; + +class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>: + ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val), + [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>; + +multiclass RotateMaskAlgebraicImm { + def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>; + def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>; + def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>; + def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>; +} + +defm ROTMAI : RotateMaskAlgebraicImm; + +//===----------------------------------------------------------------------===// +// Branch and conditionals: +//===----------------------------------------------------------------------===// + +let isTerminator = 1, isBarrier = 1 in { + // Halt If Equal (r32 preferred slot only, no vector form) + def HEQr32: + RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB), + "heq\t$rA, $rB", BranchResolv, + [/* no pattern to match */]>; + + def HEQIr32 : + RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val), + "heqi\t$rA, $val", BranchResolv, + [/* no pattern to match */]>; + + // HGT/HGTI: These instructions use signed arithmetic for the comparison, + // contrasting with HLGT/HLGTI, which use unsigned comparison: + def HGTr32: + RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB), + "hgt\t$rA, $rB", BranchResolv, + [/* no pattern to match */]>; + + def HGTIr32: + RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val), + "hgti\t$rA, $val", BranchResolv, + [/* no pattern to match */]>; + + def HLGTr32: + RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB), + "hlgt\t$rA, $rB", BranchResolv, + [/* no pattern to match */]>; + + def HLGTIr32: + RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val), + "hlgti\t$rA, $val", BranchResolv, + [/* no pattern to match */]>; +} + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// Comparison operators for i8, i16 and i32: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class CEQBInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpEqualByte +{ + def v16i8 : + CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r8 : + CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>; +} + +class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpEqualByteImm +{ + def v16i8 : + CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), + [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA), + v16i8SExt8Imm:$val))]>; + def r8: + CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), + [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>; +} + +class CEQHInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpEqualHalfword +{ + def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>; +} + +class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpEqualHalfwordImm +{ + def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (seteq (v8i16 VECREG:$rA), + (v8i16 v8i16SExt10Imm:$val)))]>; + def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>; +} + +class CEQInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpEqualWord +{ + def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + + def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>; +} + +class CEQIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpEqualWordImm +{ + def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (seteq (v4i32 VECREG:$rA), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>; +} + +class CGTBInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpGtrByte +{ + def v16i8 : + CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r8 : + CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>; +} + +class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpGtrByteImm +{ + def v16i8 : + CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), + [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA), + v16i8SExt8Imm:$val))]>; + def r8: + CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), + [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>; +} + +class CGTHInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpGtrHalfword +{ + def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>; +} + +class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpGtrHalfwordImm +{ + def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (setgt (v8i16 VECREG:$rA), + (v8i16 v8i16SExt10Imm:$val)))]>; + def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>; +} + +class CGTInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpGtrWord +{ + def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + + def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>; +} + +class CGTIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpGtrWordImm +{ + def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (setgt (v4i32 VECREG:$rA), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>; + + // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence: + def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def f32: CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val), + [/* no pattern */]>; +} + +class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpLGtrByte +{ + def v16i8 : + CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r8 : + CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB), + [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>; +} + +class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpLGtrByteImm +{ + def v16i8 : + CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val), + [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA), + v16i8SExt8Imm:$val))]>; + def r8: + CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val), + [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>; +} + +class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpLGtrHalfword +{ + def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA), + (v8i16 VECREG:$rB)))]>; + + def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB), + [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>; +} + +class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpLGtrHalfwordImm +{ + def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v8i16 VECREG:$rT), + (setugt (v8i16 VECREG:$rA), + (v8i16 v8i16SExt10Imm:$val)))]>; + def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val), + [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>; +} + +class CLGTInst<dag OOL, dag IOL, list<dag> pattern> : + RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB", + ByteOp, pattern>; + +multiclass CmpLGtrWord +{ + def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (v4i32 VECREG:$rT), + (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; + + def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), + [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>; +} + +class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> : + RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val", + ByteOp, pattern>; + +multiclass CmpLGtrWordImm +{ + def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val), + [(set (v4i32 VECREG:$rT), + (setugt (v4i32 VECREG:$rA), + (v4i32 v4i32SExt16Imm:$val)))]>; + + def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val), + [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>; +} + +defm CEQB : CmpEqualByte; +defm CEQBI : CmpEqualByteImm; +defm CEQH : CmpEqualHalfword; +defm CEQHI : CmpEqualHalfwordImm; +defm CEQ : CmpEqualWord; +defm CEQI : CmpEqualWordImm; +defm CGTB : CmpGtrByte; +defm CGTBI : CmpGtrByteImm; +defm CGTH : CmpGtrHalfword; +defm CGTHI : CmpGtrHalfwordImm; +defm CGT : CmpGtrWord; +defm CGTI : CmpGtrWordImm; +defm CLGTB : CmpLGtrByte; +defm CLGTBI : CmpLGtrByteImm; +defm CLGTH : CmpLGtrHalfword; +defm CLGTHI : CmpLGtrHalfwordImm; +defm CLGT : CmpLGtrWord; +defm CLGTI : CmpLGtrWordImm; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// For SETCC primitives not supported above (setlt, setle, setge, etc.) +// define a pattern to generate the right code, as a binary operator +// (in a manner of speaking.) +// +// Notes: +// 1. This only matches the setcc set of conditionals. Special pattern +// matching is used for select conditionals. +// +// 2. The "DAG" versions of these classes is almost exclusively used for +// i64 comparisons. See the tblgen fundamentals documentation for what +// ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern +// class for where ResultInstrs originates. +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype, + SPUInstr xorinst, SPUInstr cmpare>: + Pat<(cond rclass:$rA, rclass:$rB), + (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>; + +class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype, + PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>: + Pat<(cond rclass:$rA, (inttype immpred:$imm)), + (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>; + +def : SETCCNegCondReg<setne, R8C, i8, XORBIr8, CEQBr8>; +def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>; + +def : SETCCNegCondReg<setne, R16C, i16, XORHIr16, CEQHr16>; +def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>; + +def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>; +def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>; + +class SETCCBinOpReg<PatFrag cond, RegisterClass rclass, + SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>: + Pat<(cond rclass:$rA, rclass:$rB), + (binop (cmpOp1 rclass:$rA, rclass:$rB), + (cmpOp2 rclass:$rA, rclass:$rB))>; + +class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred, + ValueType immtype, + SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>: + Pat<(cond rclass:$rA, (immtype immpred:$imm)), + (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)), + (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>; + +def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>; +def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>; +def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>; +def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>; +def : Pat<(setle R8C:$rA, R8C:$rB), + (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>; +def : Pat<(setle R8C:$rA, immU8:$imm), + (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; + +def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>; +def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16, + ORr16, CGTHIr16, CEQHIr16>; +def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>; +def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>; +def : Pat<(setle R16C:$rA, R16C:$rB), + (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; +def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm), + (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; + +def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>; +def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32, + ORr32, CGTIr32, CEQIr32>; +def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>; +def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>; +def : Pat<(setle R32C:$rA, R32C:$rB), + (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; +def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm), + (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; + +def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>; +def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>; +def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>; +def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>; +def : Pat<(setule R8C:$rA, R8C:$rB), + (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>; +def : Pat<(setule R8C:$rA, immU8:$imm), + (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>; + +def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>; +def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16, + ORr16, CLGTHIr16, CEQHIr16>; +def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>; +def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16, + CLGTHIr16, CEQHIr16>; +def : Pat<(setule R16C:$rA, R16C:$rB), + (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>; +def : Pat<(setule R16C:$rA, i16ImmSExt10:$imm), + (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>; + +def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>; +def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32, + ORr32, CLGTIr32, CEQIr32>; +def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>; +def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>; +def : Pat<(setule R32C:$rA, R32C:$rB), + (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>; +def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm), + (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// select conditional patterns: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype, + SPUInstr selinstr, SPUInstr cmpare>: + Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rTrue, rclass:$rFalse, + (cmpare rclass:$rA, rclass:$rB))>; + +class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype, + PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>: + Pat<(select (inttype (cond rclass:$rA, immpred:$imm)), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rTrue, rclass:$rFalse, + (cmpare rclass:$rA, immpred:$imm))>; + +def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>; +def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>; +def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>; +def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>; +def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>; +def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>; + +def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>; +def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>; +def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>; +def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>; +def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>; +def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>; + +def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>; +def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>; +def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>; +def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>; +def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>; +def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>; + +class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype, + SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1, + SPUInstr cmpOp2>: + Pat<(select (inttype (cond rclass:$rA, rclass:$rB)), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rFalse, rclass:$rTrue, + (binop (cmpOp1 rclass:$rA, rclass:$rB), + (cmpOp2 rclass:$rA, rclass:$rB)))>; + +class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred, + ValueType inttype, + SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1, + SPUInstr cmpOp2>: + Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))), + rclass:$rTrue, rclass:$rFalse), + (selinstr rclass:$rFalse, rclass:$rTrue, + (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)), + (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>; + +def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>; +def : SELECTBinOpImm<setge, R8C, immSExt8, i8, + SELBr8, ORr8, CGTBIr8, CEQBIr8>; + +def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>; +def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16, + SELBr16, ORr16, CGTHIr16, CEQHIr16>; + +def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>; +def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32, + SELBr32, ORr32, CGTIr32, CEQIr32>; + +def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>; +def : SELECTBinOpImm<setuge, R8C, immSExt8, i8, + SELBr8, ORr8, CLGTBIr8, CEQBIr8>; + +def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>; +def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16, + SELBr16, ORr16, CLGTHIr16, CEQHIr16>; + +def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>; +def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32, + SELBr32, ORr32, CLGTIr32, CEQIr32>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +let isCall = 1, + // All calls clobber the non-callee-saved registers: + Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, + R10,R11,R12,R13,R14,R15,R16,R17,R18,R19, + R20,R21,R22,R23,R24,R25,R26,R27,R28,R29, + R30,R31,R32,R33,R34,R35,R36,R37,R38,R39, + R40,R41,R42,R43,R44,R45,R46,R47,R48,R49, + R50,R51,R52,R53,R54,R55,R56,R57,R58,R59, + R60,R61,R62,R63,R64,R65,R66,R67,R68,R69, + R70,R71,R72,R73,R74,R75,R76,R77,R78,R79], + // All of these instructions use $lr (aka $0) + Uses = [R0] in { + // Branch relative and set link: Used if we actually know that the target + // is within [-32768, 32767] bytes of the target + def BRSL: + BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops), + "brsl\t$$lr, $func", + [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>; + + // Branch absolute and set link: Used if we actually know that the target + // is an absolute address + def BRASL: + BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops), + "brasl\t$$lr, $func", + [(SPUcall (SPUaform tglobaladdr:$func, 0))]>; + + // Branch indirect and set link if external data. These instructions are not + // actually generated, matched by an intrinsic: + def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>; + def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>; + def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>; + def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>; + + // Branch indirect and set link. This is the "X-form" address version of a + // function call + def BISL: + BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>; +} + +// Support calls to external symbols: +def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)), + (BRSL texternalsym:$func)>; + +def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), + (BRASL texternalsym:$func)>; + +// Unconditional branches: +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + let isBarrier = 1 in { + def BR : + UncondBranch<0b001001100, (outs), (ins brtarget:$dest), + "br\t$dest", + [(br bb:$dest)]>; + + // Unconditional, absolute address branch + def BRA: + UncondBranch<0b001100000, (outs), (ins brtarget:$dest), + "bra\t$dest", + [/* no pattern */]>; + + // Indirect branch + let isIndirectBranch = 1 in { + def BI: + BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + } + } + + // Conditional branches: + class BRNZInst<dag IOL, list<dag> pattern>: + RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest", + BranchResolv, pattern>; + + class BRNZRegInst<RegisterClass rclass>: + BRNZInst<(ins rclass:$rCond, brtarget:$dest), + [(brcond rclass:$rCond, bb:$dest)]>; + + class BRNZVecInst<ValueType vectype>: + BRNZInst<(ins VECREG:$rCond, brtarget:$dest), + [(brcond (vectype VECREG:$rCond), bb:$dest)]>; + + multiclass BranchNotZero { + def v4i32 : BRNZVecInst<v4i32>; + def r32 : BRNZRegInst<R32C>; + } + + defm BRNZ : BranchNotZero; + + class BRZInst<dag IOL, list<dag> pattern>: + RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest", + BranchResolv, pattern>; + + class BRZRegInst<RegisterClass rclass>: + BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; + + class BRZVecInst<ValueType vectype>: + BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchZero { + def v4i32: BRZVecInst<v4i32>; + def r32: BRZRegInst<R32C>; + } + + defm BRZ: BranchZero; + + // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would + // be useful: + /* + class BINZInst<dag IOL, list<dag> pattern>: + BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>; + + class BINZRegInst<RegisterClass rclass>: + BINZInst<(ins rclass:$rA, brtarget:$dest), + [(brcond rclass:$rA, R32C:$dest)]>; + + class BINZVecInst<ValueType vectype>: + BINZInst<(ins VECREG:$rA, R32C:$dest), + [(brcond (vectype VECREG:$rA), R32C:$dest)]>; + + multiclass BranchNotZeroIndirect { + def v4i32: BINZVecInst<v4i32>; + def r32: BINZRegInst<R32C>; + } + + defm BINZ: BranchNotZeroIndirect; + + class BIZInst<dag IOL, list<dag> pattern>: + BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>; + + class BIZRegInst<RegisterClass rclass>: + BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>; + + class BIZVecInst<ValueType vectype>: + BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>; + + multiclass BranchZeroIndirect { + def v4i32: BIZVecInst<v4i32>; + def r32: BIZRegInst<R32C>; + } + + defm BIZ: BranchZeroIndirect; + */ + + class BRHNZInst<dag IOL, list<dag> pattern>: + RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv, + pattern>; + + class BRHNZRegInst<RegisterClass rclass>: + BRHNZInst<(ins rclass:$rCond, brtarget:$dest), + [(brcond rclass:$rCond, bb:$dest)]>; + + class BRHNZVecInst<ValueType vectype>: + BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchNotZeroHalfword { + def v8i16: BRHNZVecInst<v8i16>; + def r16: BRHNZRegInst<R16C>; + } + + defm BRHNZ: BranchNotZeroHalfword; + + class BRHZInst<dag IOL, list<dag> pattern>: + RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv, + pattern>; + + class BRHZRegInst<RegisterClass rclass>: + BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>; + + class BRHZVecInst<ValueType vectype>: + BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>; + + multiclass BranchZeroHalfword { + def v8i16: BRHZVecInst<v8i16>; + def r16: BRHZRegInst<R16C>; + } + + defm BRHZ: BranchZeroHalfword; +} + +//===----------------------------------------------------------------------===// +// setcc and brcond patterns: +//===----------------------------------------------------------------------===// + +def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest), + (BRHZr16 R16C:$rA, bb:$dest)>; +def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest), + (BRHNZr16 R16C:$rA, bb:$dest)>; + +def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest), + (BRZr32 R32C:$rA, bb:$dest)>; +def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest), + (BRNZr32 R32C:$rA, bb:$dest)>; + +multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32> +{ + def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; + + def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>; +} + +defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>; +defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>; + +multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32> +{ + def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; + + def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; +} + +defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>; +defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>; + +multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16, + SPUInstr orinst32, SPUInstr brinst32> +{ + def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), + (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), + bb:$dest)>; + + def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB), + (CEQHr16 R16C:$rA, R16:$rB)), + bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), + (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), + bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB), + (CEQr32 R32C:$rA, R32C:$rB)), + bb:$dest)>; +} + +defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>; +defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>; + +multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32> +{ + def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>; + + def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>; +} + +defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>; +defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>; + +multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16, + SPUInstr orinst32, SPUInstr brinst32> +{ + def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest), + (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), + (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)), + bb:$dest)>; + + def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest), + (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB), + (CEQHr16 R16C:$rA, R16:$rB)), + bb:$dest)>; + + def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest), + (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), + (CEQIr32 R32C:$rA, i32ImmSExt10:$val)), + bb:$dest)>; + + def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest), + (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB), + (CEQr32 R32C:$rA, R32C:$rB)), + bb:$dest)>; +} + +defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>; +defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>; + +let isTerminator = 1, isBarrier = 1 in { + let isReturn = 1 in { + def RET: + RETForm<"bi\t$$lr", [(retflag)]>; + } +} + +//===----------------------------------------------------------------------===// +// Single precision floating point instructions +//===----------------------------------------------------------------------===// + +class FAInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB", + SPrecFP, pattern>; + +class FAVecInst<ValueType vectype>: + FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +multiclass SFPAdd +{ + def v4f32: FAVecInst<v4f32>; + def f32: FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>; +} + +defm FA : SFPAdd; + +class FSInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB", + SPrecFP, pattern>; + +class FSVecInst<ValueType vectype>: + FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (vectype VECREG:$rT), + (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; + +multiclass SFPSub +{ + def v4f32: FSVecInst<v4f32>; + def f32: FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>; +} + +defm FS : SFPSub; + +class FMInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01100011010, OOL, IOL, + "fm\t$rT, $rA, $rB", SPrecFP, + pattern>; + +class FMVecInst<ValueType type>: + FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + [(set (type VECREG:$rT), + (fmul (type VECREG:$rA), (type VECREG:$rB)))]>; + +multiclass SFPMul +{ + def v4f32: FMVecInst<v4f32>; + def f32: FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; +} + +defm FM : SFPMul; + +// Floating point multiply and add +// e.g. d = c + (a * b) +def FMAv4f32: + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fadd (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>; + +def FMAf32: + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fma\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +// FP multiply and subtract +// Subtracts value in rC from product +// res = a * b - c +def FMSv4f32 : + RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (v4f32 VECREG:$rC)))]>; + +def FMSf32 : + RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, + (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>; + +// Floating Negative Mulitply and Subtract +// Subtracts product from value in rC +// res = fneg(fms a b c) +// = - (a * b - c) +// = c - a * b +// NOTE: subtraction order +// fsub a b = a - b +// fs a b = b - a? +def FNMSf32 : + RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>; + +def FNMSv4f32 : + RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "fnms\t$rT, $rA, $rB, $rC", SPrecFP, + [(set (v4f32 VECREG:$rT), + (fsub (v4f32 VECREG:$rC), + (fmul (v4f32 VECREG:$rA), + (v4f32 VECREG:$rB))))]>; + + + + +// Floating point reciprocal estimate + +class FRESTInst<dag OOL, dag IOL>: + RRForm_1<0b00110111000, OOL, IOL, + "frest\t$rT, $rA", SPrecFP, + [/* no pattern */]>; + +def FRESTv4f32 : + FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>; + +def FRESTf32 : + FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>; + +// Floating point interpolate (used in conjunction with reciprocal estimate) +def FIv4f32 : + RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "fi\t$rT, $rA, $rB", SPrecFP, + [/* no pattern */]>; + +def FIf32 : + RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB), + "fi\t$rT, $rA, $rB", SPrecFP, + [/* no pattern */]>; + +//-------------------------------------------------------------------------- +// Basic single precision floating point comparisons: +// +// Note: There is no support on SPU for single precision NaN. Consequently, +// ordered and unordered comparisons are the same. +//-------------------------------------------------------------------------- + +def FCEQf32 : + RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fceq\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>; + +def : Pat<(setoeq R32FP:$rA, R32FP:$rB), + (FCEQf32 R32FP:$rA, R32FP:$rB)>; + +def FCMEQf32 : + RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fcmeq\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + +def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)), + (FCMEQf32 R32FP:$rA, R32FP:$rB)>; + +def FCGTf32 : + RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fcgt\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>; + +def : Pat<(setogt R32FP:$rA, R32FP:$rB), + (FCGTf32 R32FP:$rA, R32FP:$rB)>; + +def FCMGTf32 : + RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB), + "fcmgt\t$rT, $rA, $rB", SPrecFP, + [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>; + +def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)), + (FCMGTf32 R32FP:$rA, R32FP:$rB)>; + +//-------------------------------------------------------------------------- +// Single precision floating point comparisons and SETCC equivalents: +//-------------------------------------------------------------------------- + +def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>; +def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>; + +def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>; +def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>; + +def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>; +def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>; + +def : Pat<(setule R32FP:$rA, R32FP:$rB), + (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; +def : Pat<(setole R32FP:$rA, R32FP:$rB), + (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>; + +// FP Status and Control Register Write +// Why isn't rT a don't care in the ISA? +// Should we create a special RRForm_3 for this guy and zero out the rT? +def FSCRWf32 : + RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA), + "fscrwr\t$rA", SPrecFP, + [/* This instruction requires an intrinsic. Note: rT is unused. */]>; + +// FP Status and Control Register Read +def FSCRRf32 : + RRForm_2<0b01011101110, (outs R32FP:$rT), (ins), + "fscrrd\t$rT", SPrecFP, + [/* This instruction requires an intrinsic */]>; + +// llvm instruction space +// How do these map onto cell instructions? +// fdiv rA rB +// frest rC rB # c = 1/b (both lines) +// fi rC rB rC +// fm rD rA rC # d = a * 1/b +// fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world +// fma rB rB rC rD # b = b * c + d +// = -(d *b -a) * c + d +// = a * c - c ( a *b *c - a) + +// fcopysign (???) + +// Library calls: +// These llvm instructions will actually map to library calls. +// All that's needed, then, is to check that the appropriate library is +// imported and do a brsl to the proper function name. +// frem # fmod(x, y): x - (x/y) * y +// (Note: fmod(double, double), fmodf(float,float) +// fsqrt? +// fsin? +// fcos? +// Unimplemented SPU instruction space +// floating reciprocal absolute square root estimate (frsqest) + +// The following are probably just intrinsics +// status and control register write +// status and control register read + +//-------------------------------------- +// Floating Point Conversions +// Signed conversions: +def CSiFv4f32: + CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "csflt\t$rT, $rA, 0", SPrecFP, + [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>; + +// Convert signed integer to floating point +def CSiFf32 : + CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA), + "csflt\t$rT, $rA, 0", SPrecFP, + [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>; + +// Convert unsigned into to float +def CUiFv4f32 : + CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "cuflt\t$rT, $rA, 0", SPrecFP, + [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>; + +def CUiFf32 : + CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA), + "cuflt\t$rT, $rA, 0", SPrecFP, + [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>; + +// Convert float to unsigned int +// Assume that scale = 0 + +def CFUiv4f32 : + CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "cfltu\t$rT, $rA, 0", SPrecFP, + [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>; + +def CFUif32 : + CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), + "cfltu\t$rT, $rA, 0", SPrecFP, + [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>; + +// Convert float to signed int +// Assume that scale = 0 + +def CFSiv4f32 : + CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA), + "cflts\t$rT, $rA, 0", SPrecFP, + [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>; + +def CFSif32 : + CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA), + "cflts\t$rT, $rA, 0", SPrecFP, + [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>; + +//===----------------------------------------------------------------------==// +// Single<->Double precision conversions +//===----------------------------------------------------------------------==// + +// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a +// v4f32, output is v2f64--which goes in the name?) + +// Floating point extend single to double +// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it +// operates on two double-word slots (i.e. 1st and 3rd fp numbers +// are ignored). +def FESDvec : + RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA), + "fesd\t$rT, $rA", SPrecFP, + [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>; + +def FESDf32 : + RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA), + "fesd\t$rT, $rA", SPrecFP, + [(set R64FP:$rT, (fextend R32FP:$rA))]>; + +// Floating point round double to single +//def FRDSvec : +// RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA), +// "frds\t$rT, $rA,", SPrecFP, +// [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>; + +def FRDSf64 : + RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA), + "frds\t$rT, $rA", SPrecFP, + [(set R32FP:$rT, (fround R64FP:$rA))]>; + +//ToDo include anyextend? + +//===----------------------------------------------------------------------==// +// Double precision floating point instructions +//===----------------------------------------------------------------------==// +def FAf64 : + RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + "dfa\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>; + +def FAv2f64 : + RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfa\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; + +def FSf64 : + RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + "dfs\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>; + +def FSv2f64 : + RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfs\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; + +def FMf64 : + RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB), + "dfm\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>; + +def FMv2f64: + RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), + "dfm\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>; + +def FMAf64: + RRForm<0b00111010110, (outs R64FP:$rT), + (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + "dfma\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FMAv2f64: + RRForm<0b00111010110, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "dfma\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fadd (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FMSf64 : + RRForm<0b10111010110, (outs R64FP:$rT), + (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + "dfms\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FMSv2f64 : + RRForm<0b10111010110, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "dfms\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)), + (v2f64 VECREG:$rC)))]>; + +// DFNMS: - (a * b - c) +// - (a * b) + c => c - (a * b) + +class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>: + RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB", + DPrecFP, pattern>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +class DFNMSVecInst<list<dag> pattern>: + DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + pattern>; + +class DFNMSRegInst<list<dag> pattern>: + DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + pattern>; + +multiclass DFMultiplySubtract +{ + def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), + (fsub (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB))))]>; + + def f64 : DFNMSRegInst<[(set R64FP:$rT, + (fsub R64FP:$rC, + (fmul R64FP:$rA, R64FP:$rB)))]>; +} + +defm DFNMS : DFMultiplySubtract; + +// - (a * b + c) +// - (a * b) - c +def FNMAf64 : + RRForm<0b11111010110, (outs R64FP:$rT), + (ins R64FP:$rA, R64FP:$rB, R64FP:$rC), + "dfnma\t$rT, $rA, $rB", DPrecFP, + [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +def FNMAv2f64 : + RRForm<0b11111010110, (outs VECREG:$rT), + (ins VECREG:$rA, VECREG:$rB, VECREG:$rC), + "dfnma\t$rT, $rA, $rB", DPrecFP, + [(set (v2f64 VECREG:$rT), + (fneg (fadd (v2f64 VECREG:$rC), + (fmul (v2f64 VECREG:$rA), + (v2f64 VECREG:$rB)))))]>, + RegConstraint<"$rC = $rT">, + NoEncode<"$rC">; + +//===----------------------------------------------------------------------==// +// Floating point negation and absolute value +//===----------------------------------------------------------------------==// + +def : Pat<(fneg (v4f32 VECREG:$rA)), + (XORfnegvec (v4f32 VECREG:$rA), + (v4f32 (ILHUv4i32 0x8000)))>; + +def : Pat<(fneg R32FP:$rA), + (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>; + +// Floating point absolute value +// Note: f64 fabs is custom-selected. + +def : Pat<(fabs R32FP:$rA), + (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>; + +def : Pat<(fabs (v4f32 VECREG:$rA)), + (ANDfabsvec (v4f32 VECREG:$rA), + (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>; + +//===----------------------------------------------------------------------===// +// Hint for branch instructions: +//===----------------------------------------------------------------------===// +def HBRA : + HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">; + +//===----------------------------------------------------------------------===// +// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong +// in the odd pipeline) +//===----------------------------------------------------------------------===// + +def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> { + let Pattern = []; + + let Inst{0-10} = 0b10000000010; + let Inst{11-17} = 0; + let Inst{18-24} = 0; + let Inst{25-31} = 0; +} + +def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> { + let Pattern = []; + + let Inst{0-10} = 0b10000000000; + let Inst{11-17} = 0; + let Inst{18-24} = 0; + let Inst{25-31} = 0; +} + +//===----------------------------------------------------------------------===// +// Bit conversions (type conversions between vector/packed types) +// NOTE: Promotions are handled using the XS* instructions. +//===----------------------------------------------------------------------===// +def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>; + +def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>; + +def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>; + +def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>; +def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>; + +def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>; + +def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>; +def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>; + +def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))), + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; +def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))), + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; +def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))), + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; +def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))), + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; +def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))), + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; +def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))), + (COPY_TO_REGCLASS VECREG:$src, GPRC)>; + +def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))), + (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; +def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))), + (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; +def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))), + (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; +def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))), + (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; +def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))), + (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; +def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))), + (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>; + +def : Pat<(i32 (bitconvert R32FP:$rA)), + (COPY_TO_REGCLASS R32FP:$rA, R32C)>; + +def : Pat<(f32 (bitconvert R32C:$rA)), + (COPY_TO_REGCLASS R32C:$rA, R32FP)>; + +def : Pat<(i64 (bitconvert R64FP:$rA)), + (COPY_TO_REGCLASS R64FP:$rA, R64C)>; + +def : Pat<(f64 (bitconvert R64C:$rA)), + (COPY_TO_REGCLASS R64C:$rA, R64FP)>; + + +//===----------------------------------------------------------------------===// +// Instruction patterns: +//===----------------------------------------------------------------------===// + +// General 32-bit constants: +def : Pat<(i32 imm:$imm), + (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>; + +// Single precision float constants: +def : Pat<(f32 fpimm:$imm), + (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>; + +// General constant 32-bit vectors +def : Pat<(v4i32 v4i32Imm:$imm), + (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))), + (LO16_vec v4i32Imm:$imm))>; + +// 8-bit constants +def : Pat<(i8 imm:$imm), + (ILHr8 imm:$imm)>; + +//===----------------------------------------------------------------------===// +// Zero/Any/Sign extensions +//===----------------------------------------------------------------------===// + +// sext 8->32: Sign extend bytes to words +def : Pat<(sext_inreg R32C:$rSrc, i8), + (XSHWr32 (XSBHr32 R32C:$rSrc))>; + +def : Pat<(i32 (sext R8C:$rSrc)), + (XSHWr16 (XSBHr8 R8C:$rSrc))>; + +// sext 8->64: Sign extend bytes to double word +def : Pat<(sext_inreg R64C:$rSrc, i8), + (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>; + +def : Pat<(i64 (sext R8C:$rSrc)), + (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>; + +// zext 8->16: Zero extend bytes to halfwords +def : Pat<(i16 (zext R8C:$rSrc)), + (ANDHIi8i16 R8C:$rSrc, 0xff)>; + +// zext 8->32: Zero extend bytes to words +def : Pat<(i32 (zext R8C:$rSrc)), + (ANDIi8i32 R8C:$rSrc, 0xff)>; + +// zext 8->64: Zero extend bytes to double words +def : Pat<(i64 (zext R8C:$rSrc)), + (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32 + (COPY_TO_REGCLASS + (ANDIi8i32 R8C:$rSrc,0xff), VECREG), + 0x4), + (ILv4i32 0x0), + (FSMBIv4i32 0x0f0f)), R64C)>; + +// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits +def : Pat<(i16 (anyext R8C:$rSrc)), + (ORHIi8i16 R8C:$rSrc, 0)>; + +// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits +def : Pat<(i32 (anyext R8C:$rSrc)), + (COPY_TO_REGCLASS R8C:$rSrc, R32C)>; + +// sext 16->64: Sign extend halfword to double word +def : Pat<(sext_inreg R64C:$rSrc, i16), + (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>; + +def : Pat<(sext R16C:$rSrc), + (XSWDr64 (XSHWr16 R16C:$rSrc))>; + +// zext 16->32: Zero extend halfwords to words +def : Pat<(i32 (zext R16C:$rSrc)), + (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>; + +def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))), + (ANDIi16i32 R16C:$rSrc, 0xf)>; + +def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))), + (ANDIi16i32 R16C:$rSrc, 0xff)>; + +def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))), + (ANDIi16i32 R16C:$rSrc, 0xfff)>; + +// anyext 16->32: Extend 16->32 bits, irrespective of sign +def : Pat<(i32 (anyext R16C:$rSrc)), + (COPY_TO_REGCLASS R16C:$rSrc, R32C)>; + +//===----------------------------------------------------------------------===// +// Truncates: +// These truncates are for the SPU's supported types (i8, i16, i32). i64 and +// above are custom lowered. +//===----------------------------------------------------------------------===// + +def : Pat<(i8 (trunc GPRC:$src)), + (COPY_TO_REGCLASS + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>; + +def : Pat<(i8 (trunc R64C:$src)), + (COPY_TO_REGCLASS + (SHUFBv2i64_m32 + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>; + +def : Pat<(i8 (trunc R32C:$src)), + (COPY_TO_REGCLASS + (SHUFBv4i32_m32 + (COPY_TO_REGCLASS R32C:$src, VECREG), + (COPY_TO_REGCLASS R32C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; + +def : Pat<(i8 (trunc R16C:$src)), + (COPY_TO_REGCLASS + (SHUFBv4i32_m32 + (COPY_TO_REGCLASS R16C:$src, VECREG), + (COPY_TO_REGCLASS R16C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>; + +def : Pat<(i16 (trunc GPRC:$src)), + (COPY_TO_REGCLASS + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>; + +def : Pat<(i16 (trunc R64C:$src)), + (COPY_TO_REGCLASS + (SHUFBv2i64_m32 + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>; + +def : Pat<(i16 (trunc R32C:$src)), + (COPY_TO_REGCLASS + (SHUFBv4i32_m32 + (COPY_TO_REGCLASS R32C:$src, VECREG), + (COPY_TO_REGCLASS R32C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>; + +def : Pat<(i32 (trunc GPRC:$src)), + (COPY_TO_REGCLASS + (SHUFBgprc GPRC:$src, GPRC:$src, + (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>; + +def : Pat<(i32 (trunc R64C:$src)), + (COPY_TO_REGCLASS + (SHUFBv2i64_m32 + (COPY_TO_REGCLASS R64C:$src, VECREG), + (COPY_TO_REGCLASS R64C:$src, VECREG), + (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>; + +//===----------------------------------------------------------------------===// +// Address generation: SPU, like PPC, has to split addresses into high and +// low parts in order to load them into a register. +//===----------------------------------------------------------------------===// + +def : Pat<(SPUaform tglobaladdr:$in, 0), (ILAlsa tglobaladdr:$in)>; +def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>; +def : Pat<(SPUaform tjumptable:$in, 0), (ILAlsa tjumptable:$in)>; +def : Pat<(SPUaform tconstpool:$in, 0), (ILAlsa tconstpool:$in)>; + +def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0), + (SPUlo tglobaladdr:$in, 0)), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; + +def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0), + (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0), + (SPUlo tjumptable:$in, 0)), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; + +def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0), + (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; + +def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)), + (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>; + +def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)), + (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>; + +def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)), + (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>; + +def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)), + (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>; + +// Intrinsics: +include "CellSDKIntrinsics.td" +// Various math operator instruction sequences +include "SPUMathInstr.td" +// 64-bit "instructions"/support +include "SPU64InstrInfo.td" +// 128-bit "instructions"/support +include "SPU128InstrInfo.td" diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp new file mode 100644 index 000000000000..3e948d071d63 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.cpp @@ -0,0 +1,14 @@ +//==-- SPUMachineFunctionInfo.cpp - Private data used for CellSPU ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SPUMachineFunction.h" + +using namespace llvm; + +void SPUFunctionInfo::anchor() { } diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h new file mode 100644 index 000000000000..399684bb0887 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUMachineFunction.h @@ -0,0 +1,50 @@ +//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_MACHINE_FUNCTION_INFO_H +#define SPU_MACHINE_FUNCTION_INFO_H + +#include "llvm/CodeGen/MachineFunction.h" + +namespace llvm { + +/// SPUFunctionInfo - Cell SPU target-specific information for each +/// MachineFunction +class SPUFunctionInfo : public MachineFunctionInfo { + virtual void anchor(); + + /// UsesLR - Indicates whether LR is used in the current function. + /// + bool UsesLR; + + // VarArgsFrameIndex - FrameIndex for start of varargs area. + int VarArgsFrameIndex; + +public: + SPUFunctionInfo(MachineFunction& MF) + : UsesLR(false), + VarArgsFrameIndex(0) + {} + + void setUsesLR(bool U) { UsesLR = U; } + bool usesLR() { return UsesLR; } + + int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } + void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } +}; + +} // end of namespace llvm + + +#endif + diff --git a/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td new file mode 100644 index 000000000000..9a5c3976afbe --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUMathInstr.td @@ -0,0 +1,97 @@ +//===-- SPUMathInst.td - Cell SPU math operations ---------*- tablegen -*--===// +// +// Cell SPU math operations +// +// This target description file contains instruction sequences for various +// math operations, such as vector multiplies, i32 multiply, etc., for the +// SPU's i32, i16 i8 and corresponding vector types. +// +// Any resemblance to libsimdmath or the Cell SDK simdmath library is +// purely and completely coincidental. +//===----------------------------------------------------------------------===// + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v16i8 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)), + (ORv4i32 + (ANDv4i32 + (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB), + (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8), + (ROTMAHIv8i16 VECREG:$rB, 8)), 8), + (FSMBIv8i16 0x2222)), + (ILAv4i32 0x0000ffff)), + (SHLIv4i32 + (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16), + (ROTMAIv4i32_i32 VECREG:$rB, 16)), + (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8), + (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8), + (FSMBIv8i16 0x2222)), 16))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v8i16 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB), + (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16), + (FSMBIv8i16 0xcccc))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// v4i32, i32 multiply instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +def MPYv4i32: + Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (Av4i32 + (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)), + (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))), + (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>; + +def MPYi32: + Pat<(mul R32C:$rA, R32C:$rB), + (Ar32 + (Ar32 (MPYHr32 R32C:$rA, R32C:$rB), + (MPYHr32 R32C:$rB, R32C:$rA)), + (MPYUr32 R32C:$rA, R32C:$rB))>; + +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ +// f32, v4f32 divide instruction sequence: +//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ + +// Reciprocal estimate and interpolation +def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>; +// Division estimate +def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>; +// Newton-Raphson iteration +def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA), + Interpf32.Fragment, + DivEstf32.Fragment)>; +// Epsilon addition +def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>; + +def : Pat<(fdiv R32FP:$rA, R32FP:$rB), + (SELBf32_cond NRaphf32.Fragment, + Epsilonf32.Fragment, + (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>; + +// Reciprocal estimate and interpolation +def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>; +// Division estimate +def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>; +// Newton-Raphson iteration +def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment, + (v4f32 VECREG:$rB), + (v4f32 VECREG:$rA)), + Interpv4f32.Fragment, + DivEstv4f32.Fragment)>; +// Epsilon addition +def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>; + +def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)), + (SELBv4f32_cond NRaphv4f32.Fragment, + Epsilonv4f32.Fragment, + (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB), + Epsilonv4f32.Fragment, + (v4f32 VECREG:$rA)), -1))>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNodes.td b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td new file mode 100644 index 000000000000..a47e9ef0167c --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUNodes.td @@ -0,0 +1,159 @@ +//=== SPUNodes.td - Specialized SelectionDAG nodes by CellSPU -*- tablegen -*-// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Type profiles and SelectionDAG nodes used by CellSPU +// +//===----------------------------------------------------------------------===// + +// Type profile for a call sequence +def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>; + +// SPU_GenControl: Type profile for generating control words for insertions +def SPU_GenControl : SDTypeProfile<1, 1, []>; +def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; + +def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, + [SDNPHasChain, SDNPOutGlue]>; +def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, + [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; +//===----------------------------------------------------------------------===// +// Operand constraints: +//===----------------------------------------------------------------------===// + +def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + +// Operand type constraints for vector shuffle/permute operations +def SDT_SPUshuffle : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; + +// Vector binary operator type constraints (needs a further constraint to +// ensure that operand 0 is a vector...): + +def SPUVecBinop: SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> +]>; + +// Trinary operators, e.g., addx, carry generate +def SPUIntTrinaryOp : SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0> +]>; + +// SELECT_MASK type constraints: There are several variations for the various +// vector types (this avoids having to bit_convert all over the place.) +def SPUselmask_type: SDTypeProfile<1, 1, [ + SDTCisInt<1> +]>; + +// SELB type constraints: +def SPUselb_type: SDTypeProfile<1, 3, [ + SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>; + +// SPU Vector shift pseudo-instruction type constraints +def SPUvecshift_type: SDTypeProfile<1, 2, [ + SDTCisSameAs<0, 1>, SDTCisInt<2>]>; + +// "marker" type for i64 operators that need a shuffle mask +// (i.e., uses cg or bg or another instruction that needs to +// use shufb to get things in the right place.) +// Op0: The result +// Op1, 2: LHS, RHS +// Op3: Carry-generate shuffle mask + +def SPUmarker_type : SDTypeProfile<1, 3, [ + SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>; + +//===----------------------------------------------------------------------===// +// Synthetic/pseudo-instructions +//===----------------------------------------------------------------------===// + +// SPU CNTB: +def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>; + +// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see +// SPUISelLowering.h): +def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>; + +// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only): +def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>; +def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>; +def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>; + +def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>; +def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>; + +// Vector rotate left, bits shifted out of the left are rotated in on the right +def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT", + SPUvecshift_type, []>; + +// Vector rotate left by bytes, but the count is given in bits and the SPU +// internally converts it to bytes (saves an instruction to mask off lower +// three bits) +def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS", + SPUvecshift_type>; + +// Shift entire quad left by bytes/bits. Zeros are shifted in on the right +// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128 +def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>; +def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>; +def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>; + +// SPU form select mask for bytes, immediate +def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>; + +// SPU select bits instruction +def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>; + +def SDTprefslot2vec: SDTypeProfile<1, 1, []>; +def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>; + +def SPU_vec_demote : SDTypeProfile<1, 1, []>; +def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>; + +// Address high and low components, used for [r+r] type addressing +def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>; +def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>; + +// PC-relative address +def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>; + +// A-Form local store addresses +def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>; + +// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses +def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>; + +// i64 markers: supplies extra operands used to generate the i64 operator +// instruction sequences +def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>; +def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>; +def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>; + +//===----------------------------------------------------------------------===// +// Constraints: (taken from PPCInstrInfo.td) +//===----------------------------------------------------------------------===// + +class RegConstraint<string C> { + string Constraints = C; +} + +class NoEncode<string E> { + string DisableEncoding = E; +} + +//===----------------------------------------------------------------------===// +// Return (flag isn't quite what it means: the operations are flagged so that +// instruction scheduling doesn't disassociate them.) +//===----------------------------------------------------------------------===// + +def retflag : SDNode<"SPUISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp new file mode 100644 index 000000000000..7c58041e3b84 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUNopFiller.cpp @@ -0,0 +1,153 @@ +//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The final pass just before assembly printing. This pass is the last +// checkpoint where nops and lnops are added to the instruction stream to +// satisfy the dual issue requirements. The actual dual issue scheduling is +// done (TODO: nowhere, currently) +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "SPUTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + struct SPUNopFiller : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + const InstrItineraryData *IID; + bool isEvenPlace; // the instruction slot (mem address) at hand is even/odd + + static char ID; + SPUNopFiller(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), + IID(tm.getInstrItineraryData()) + { + DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; ); + } + + virtual const char *getPassName() const { + return "SPU nop/lnop Filler"; + } + + void runOnMachineBasicBlock(MachineBasicBlock &MBB); + + bool runOnMachineFunction(MachineFunction &F) { + isEvenPlace = true; //all functions get an .align 3 directive at start + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); + FI != FE; ++FI) + runOnMachineBasicBlock(*FI); + return true; //never-ever do any more modifications, just print it! + } + + typedef enum { none = 0, // no more instructions in this function / BB + pseudo = 1, // this does not get executed + even = 2, + odd = 3 } SPUOpPlace; + SPUOpPlace getOpPlacement( MachineInstr &instr ); + + }; + char SPUNopFiller::ID = 0; + +} + +// Fill a BasicBlock to alignment. +// In the assebly we align the functions to 'even' adresses, but +// basic blocks have an implicit alignmnet. We hereby define +// basic blocks to have the same, even, alignment. +void SPUNopFiller:: +runOnMachineBasicBlock(MachineBasicBlock &MBB) +{ + assert( isEvenPlace && "basic block start from odd address"); + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) + { + SPUOpPlace this_optype, next_optype; + MachineBasicBlock::iterator J = I; + J++; + + this_optype = getOpPlacement( *I ); + next_optype = none; + while (J!=MBB.end()){ + next_optype = getOpPlacement( *J ); + ++J; + if (next_optype != pseudo ) + break; + } + + // padd: odd(wrong), even(wrong), ... + // to: nop(corr), odd(corr), even(corr)... + if( isEvenPlace && this_optype == odd && next_optype == even ) { + DEBUG( dbgs() <<"Adding NOP before: "; ); + DEBUG( I->dump(); ); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP)); + isEvenPlace=false; + } + + // padd: even(wrong), odd(wrong), ... + // to: lnop(corr), even(corr), odd(corr)... + else if ( !isEvenPlace && this_optype == even && next_optype == odd){ + DEBUG( dbgs() <<"Adding LNOP before: "; ); + DEBUG( I->dump(); ); + BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP)); + isEvenPlace=true; + } + + // now go to next mem slot + if( this_optype != pseudo ) + isEvenPlace = !isEvenPlace; + + } + + // padd basicblock end + if( !isEvenPlace ){ + MachineBasicBlock::iterator J = MBB.end(); + J--; + if (getOpPlacement( *J ) == odd) { + DEBUG( dbgs() <<"Padding basic block with NOP\n"; ); + BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP)); + } + else { + J++; + DEBUG( dbgs() <<"Padding basic block with LNOP\n"; ); + BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP)); + } + isEvenPlace=true; + } +} + +FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) { + return new SPUNopFiller(tm); +} + +// Figure out if 'instr' is executed in the even or odd pipeline +SPUNopFiller::SPUOpPlace +SPUNopFiller::getOpPlacement( MachineInstr &instr ) { + int sc = instr.getDesc().getSchedClass(); + const InstrStage *stage = IID->beginStage(sc); + unsigned FUs = stage->getUnits(); + SPUOpPlace retval; + + switch( FUs ) { + case 0: retval = pseudo; break; + case 1: retval = odd; break; + case 2: retval = even; break; + default: retval= pseudo; + assert( false && "got unknown FuncUnit\n"); + break; + }; + return retval; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUOperands.td b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td new file mode 100644 index 000000000000..6f8deef5530f --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUOperands.td @@ -0,0 +1,664 @@ +//===-- SPUOperands.td - Cell SPU Instruction Operands -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// Cell SPU Instruction Operands: +//===----------------------------------------------------------------------===// + +// TO_IMM32 - Convert an i8/i16 to i32. +def TO_IMM32 : SDNodeXForm<imm, [{ + return getI32Imm(N->getZExtValue()); +}]>; + +// TO_IMM16 - Convert an i8/i32 to i16. +def TO_IMM16 : SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16); +}]>; + + +def LO16 : SDNodeXForm<imm, [{ + unsigned val = N->getZExtValue(); + // Transformation function: get the low 16 bits. + return getI32Imm(val & 0xffff); +}]>; + +def LO16_vec : SDNodeXForm<scalar_to_vector, [{ + SDValue OpVal(0, 0); + + // Transformation function: get the low 16 bit immediate from a build_vector + // node. + assert(N->getOpcode() == ISD::BUILD_VECTOR + && "LO16_vec got something other than a BUILD_VECTOR"); + + // Get first constant operand... + for (unsigned i = 0, e = N->getNumOperands(); + OpVal.getNode() == 0 && i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + } + + assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node"); + ConstantSDNode *CN = cast<ConstantSDNode>(OpVal); + return getI32Imm((unsigned)CN->getZExtValue() & 0xffff); +}]>; + +// Transform an immediate, returning the high 16 bits shifted down: +def HI16 : SDNodeXForm<imm, [{ + return getI32Imm((unsigned)N->getZExtValue() >> 16); +}]>; + +// Transformation function: shift the high 16 bit immediate from a build_vector +// node into the low 16 bits, and return a 16-bit constant. +def HI16_vec : SDNodeXForm<scalar_to_vector, [{ + SDValue OpVal(0, 0); + + assert(N->getOpcode() == ISD::BUILD_VECTOR + && "HI16_vec got something other than a BUILD_VECTOR"); + + // Get first constant operand... + for (unsigned i = 0, e = N->getNumOperands(); + OpVal.getNode() == 0 && i != e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (OpVal.getNode() == 0) + OpVal = N->getOperand(i); + } + + assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node"); + ConstantSDNode *CN = cast<ConstantSDNode>(OpVal); + return getI32Imm((unsigned)CN->getZExtValue() >> 16); +}]>; + +// simm7 predicate - True if the immediate fits in an 7-bit signed +// field. +def simm7: PatLeaf<(imm), [{ + int sextVal = int(N->getSExtValue()); + return (sextVal >= -64 && sextVal <= 63); +}]>; + +// uimm7 predicate - True if the immediate fits in an 7-bit unsigned +// field. +def uimm7: PatLeaf<(imm), [{ + return (N->getZExtValue() <= 0x7f); +}]>; + +// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended +// field. +def immSExt8 : PatLeaf<(imm), [{ + int Value = int(N->getSExtValue()); + return (Value >= -(1 << 8) && Value <= (1 << 8) - 1); +}]>; + +// immU8: immediate, unsigned 8-bit quantity +def immU8 : PatLeaf<(imm), [{ + return (N->getZExtValue() <= 0xff); +}]>; + +// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i32ImmSExt10 : PatLeaf<(imm), [{ + return isI32IntS10Immediate(N); +}]>; + +// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned +// field. Used by RI10Form instructions like 'ldq'. +def i32ImmUns10 : PatLeaf<(imm), [{ + return isI32IntU10Immediate(N); +}]>; + +// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign +// extended field. Used by RI10Form instructions like 'ldq'. +def i16ImmSExt10 : PatLeaf<(imm), [{ + return isI16IntS10Immediate(N); +}]>; + +// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned +// value. Used by RI10Form instructions. +def i16ImmUns10 : PatLeaf<(imm), [{ + return isI16IntU10Immediate(N); +}]>; + +def immSExt16 : PatLeaf<(imm), [{ + // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended + // field. + short Ignored; + return isIntS16Immediate(N, Ignored); +}]>; + +def immZExt16 : PatLeaf<(imm), [{ + // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended + // field. + return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue(); +}], LO16>; + +def immU16 : PatLeaf<(imm), [{ + // immU16 predicate- True if the immediate fits into a 16-bit unsigned field. + return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff); +}]>; + +def imm18 : PatLeaf<(imm), [{ + // imm18 predicate: True if the immediate fits into an 18-bit unsigned field. + int Value = (int) N->getZExtValue(); + return isUInt<18>(Value); +}]>; + +def lo16 : PatLeaf<(imm), [{ + // lo16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = N->getZExtValue(); + return ((val & 0x0000ffff) == val); + } + + return false; +}], LO16>; + +def hi16 : PatLeaf<(imm), [{ + // hi16 predicate - returns true if the immediate has all zeros in the + // low order bits and is a 32-bit constant: + if (N->getValueType(0) == MVT::i32) { + uint32_t val = uint32_t(N->getZExtValue()); + return ((val & 0xffff0000) == val); + } else if (N->getValueType(0) == MVT::i64) { + uint64_t val = N->getZExtValue(); + return ((val & 0xffff0000ULL) == val); + } + + return false; +}], HI16>; + +def bitshift : PatLeaf<(imm), [{ + // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII + // (shift left quadword by bits immediate) + int64_t Val = N->getZExtValue(); + return (Val > 0 && Val <= 7); +}]>; + +//===----------------------------------------------------------------------===// +// Floating point operands: +//===----------------------------------------------------------------------===// + +// Transform a float, returning the high 16 bits shifted down, as if +// the float was really an unsigned integer: +def HI16_f32 : SDNodeXForm<fpimm, [{ + float fval = N->getValueAPF().convertToFloat(); + return getI32Imm(FloatToBits(fval) >> 16); +}]>; + +// Transformation function on floats: get the low 16 bits as if the float was +// an unsigned integer. +def LO16_f32 : SDNodeXForm<fpimm, [{ + float fval = N->getValueAPF().convertToFloat(); + return getI32Imm(FloatToBits(fval) & 0xffff); +}]>; + +def FPimm_sext16 : SDNodeXForm<fpimm, [{ + float fval = N->getValueAPF().convertToFloat(); + return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16)); +}]>; + +def FPimm_u18 : SDNodeXForm<fpimm, [{ + float fval = N->getValueAPF().convertToFloat(); + return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1)); +}]>; + +def fpimmSExt16 : PatLeaf<(fpimm), [{ + short Ignored; + return isFPS16Immediate(N, Ignored); +}], FPimm_sext16>; + +// Does the SFP constant only have upp 16 bits set? +def hi16_f32 : PatLeaf<(fpimm), [{ + if (N->getValueType(0) == MVT::f32) { + uint32_t val = FloatToBits(N->getValueAPF().convertToFloat()); + return ((val & 0xffff0000) == val); + } + + return false; +}], HI16_f32>; + +// Does the SFP constant fit into 18 bits? +def fpimm18 : PatLeaf<(fpimm), [{ + if (N->getValueType(0) == MVT::f32) { + uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat()); + return isUInt<18>(Value); + } + + return false; +}], FPimm_u18>; + +//===----------------------------------------------------------------------===// +// 64-bit operands (TODO): +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// build_vector operands: +//===----------------------------------------------------------------------===// + +// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended +// immediate constant load for v16i8 vectors. N.B.: The incoming constant has +// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). +def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8); +}]>; + +// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant +// load, works in conjunction with its transform function. N.B.: This relies the +// incoming constant being a 16-bit quantity, where the upper and lower bytes +// are EXACTLY the same (e.g., 0x2a2a) +def v16i8SExt8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; +}], v16i8SExt8Imm_xform>; + +// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit +// immediate constant load for v16i8 vectors. N.B.: The incoming constant has +// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a). +def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8); +}]>; + +// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant +// load, works in conjunction with its transform function. N.B.: This relies the +// incoming constant being a 16-bit quantity, where the upper and lower bytes +// are EXACTLY the same (e.g., 0x2a2a) +def v16i8U8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0; +}], v16i8U8Imm_xform>; + +// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16); +}]>; + +// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt8Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16SExt8Imm_xform>; + +// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16); +}]>; + +// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16SExt10Imm_xform>; + +// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned +// immediate constant load for v8i16 vectors. +def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16); +}]>; + +// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant +// load, works in conjunction with its transform function. +def v8i16Uns10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16Uns10Imm_xform>; + +// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v8i16 vectors. +def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16); +}]>; + +// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v8i16SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0; +}], v8i16Uns16Imm_xform>; + +// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended +// immediate constant load for v4i32 vectors. +def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32); +}]>; + +// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v4i32SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32SExt10Imm_xform>; + +// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned +// immediate constant load for v4i32 vectors. +def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32); +}]>; + +// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant +// load, works in conjunction with its transform function. +def v4i32Uns10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32Uns10Imm_xform>; + +// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v4i32 vectors. +def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32); +}]>; + +// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v4i32SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32SExt16Imm_xform>; + +// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned +// immediate constant load for v4i32 vectors. +def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32); +}]>; + +// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, +// works in conjunction with its transform function. +def v4i32Uns18Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], v4i32Uns18Imm_xform>; + +// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant +// load. +def ILHUvec_get_imm: SDNodeXForm<build_vector, [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32); +}]>; + +/// immILHUvec: Predicate test for a ILHU constant vector. +def immILHUvec: PatLeaf<(build_vector), [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0; +}], ILHUvec_get_imm>; + +// Catch-all for any other i32 vector constants +def v4i32_get_imm: SDNodeXForm<build_vector, [{ + return SPU::get_v4i32_imm(N, *CurDAG); +}]>; + +def v4i32Imm: PatLeaf<(build_vector), [{ + return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0; +}], v4i32_get_imm>; + +// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended +// immediate constant load for v2i64 vectors. +def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64); +}]>; + +// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v2i64SExt10Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], v2i64SExt10Imm_xform>; + +// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended +// immediate constant load for v2i64 vectors. +def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64); +}]>; + +// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant +// load, works in conjunction with its transform function. +def v2i64SExt16Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], v2i64SExt16Imm_xform>; + +// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned +// immediate constant load for v2i64 vectors. +def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64); +}]>; + +// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load, +// works in conjunction with its transform function. +def v2i64Uns18Imm: PatLeaf<(build_vector), [{ + return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], v2i64Uns18Imm_xform>; + +/// immILHUvec: Predicate test for a ILHU constant vector. +def immILHUvec_i64: PatLeaf<(build_vector), [{ + return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0; +}], ILHUvec_get_imm>; + +// Catch-all for any other i32 vector constants +def v2i64_get_imm: SDNodeXForm<build_vector, [{ + return SPU::get_v2i64_imm(N, *CurDAG); +}]>; + +def v2i64Imm: PatLeaf<(build_vector), [{ + return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0; +}], v2i64_get_imm>; + +//===----------------------------------------------------------------------===// +// Operand Definitions. + +def s7imm: Operand<i8> { + let PrintMethod = "printS7ImmOperand"; +} + +def s7imm_i8: Operand<i8> { + let PrintMethod = "printS7ImmOperand"; +} + +def u7imm: Operand<i16> { + let PrintMethod = "printU7ImmOperand"; +} + +def u7imm_i8: Operand<i8> { + let PrintMethod = "printU7ImmOperand"; +} + +def u7imm_i32: Operand<i32> { + let PrintMethod = "printU7ImmOperand"; +} + +// Halfword, signed 10-bit constant +def s10imm : Operand<i16> { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i8: Operand<i8> { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i32: Operand<i32> { + let PrintMethod = "printS10ImmOperand"; +} + +def s10imm_i64: Operand<i64> { + let PrintMethod = "printS10ImmOperand"; +} + +// Unsigned 10-bit integers: +def u10imm: Operand<i16> { + let PrintMethod = "printU10ImmOperand"; +} + +def u10imm_i8: Operand<i8> { + let PrintMethod = "printU10ImmOperand"; +} + +def u10imm_i32: Operand<i32> { + let PrintMethod = "printU10ImmOperand"; +} + +def s16imm : Operand<i16> { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i8: Operand<i8> { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i32: Operand<i32> { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_i64: Operand<i64> { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_f32: Operand<f32> { + let PrintMethod = "printS16ImmOperand"; +} + +def s16imm_f64: Operand<f64> { + let PrintMethod = "printS16ImmOperand"; +} + +def u16imm_i64 : Operand<i64> { + let PrintMethod = "printU16ImmOperand"; +} + +def u16imm_i32 : Operand<i32> { + let PrintMethod = "printU16ImmOperand"; +} + +def u16imm : Operand<i16> { + let PrintMethod = "printU16ImmOperand"; +} + +def f16imm : Operand<f32> { + let PrintMethod = "printU16ImmOperand"; +} + +def s18imm : Operand<i32> { + let PrintMethod = "printS18ImmOperand"; +} + +def u18imm : Operand<i32> { + let PrintMethod = "printU18ImmOperand"; +} + +def u18imm_i64 : Operand<i64> { + let PrintMethod = "printU18ImmOperand"; +} + +def f18imm : Operand<f32> { + let PrintMethod = "printU18ImmOperand"; +} + +def f18imm_f64 : Operand<f64> { + let PrintMethod = "printU18ImmOperand"; +} + +// Negated 7-bit halfword rotate immediate operands +def rothNeg7imm : Operand<i32> { + let PrintMethod = "printROTHNeg7Imm"; +} + +def rothNeg7imm_i16 : Operand<i16> { + let PrintMethod = "printROTHNeg7Imm"; +} + +// Negated 7-bit word rotate immediate operands +def rotNeg7imm : Operand<i32> { + let PrintMethod = "printROTNeg7Imm"; +} + +def rotNeg7imm_i16 : Operand<i16> { + let PrintMethod = "printROTNeg7Imm"; +} + +def rotNeg7imm_i8 : Operand<i8> { + let PrintMethod = "printROTNeg7Imm"; +} + +def target : Operand<OtherVT> { + let PrintMethod = "printBranchOperand"; +} + +// Absolute address call target +def calltarget : Operand<iPTR> { + let PrintMethod = "printCallOperand"; + let MIOperandInfo = (ops u18imm:$calldest); +} + +// PC relative call target +def relcalltarget : Operand<iPTR> { + let PrintMethod = "printPCRelativeOperand"; + let MIOperandInfo = (ops s16imm:$calldest); +} + +// Branch targets: +def brtarget : Operand<OtherVT> { + let PrintMethod = "printPCRelativeOperand"; +} + +// Hint for branch target +def hbrtarget : Operand<OtherVT> { + let PrintMethod = "printHBROperand"; +} + +// Indirect call target +def indcalltarget : Operand<iPTR> { + let PrintMethod = "printCallOperand"; + let MIOperandInfo = (ops ptr_rc:$calldest); +} + +def symbolHi: Operand<i32> { + let PrintMethod = "printSymbolHi"; +} + +def symbolLo: Operand<i32> { + let PrintMethod = "printSymbolLo"; +} + +def symbolLSA: Operand<i32> { + let PrintMethod = "printSymbolLSA"; +} + +// Shuffle address memory operaand [s7imm(reg) d-format] +def shufaddr : Operand<iPTR> { + let PrintMethod = "printShufAddr"; + let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg); +} + +// memory s10imm(reg) operand +def dformaddr : Operand<iPTR> { + let PrintMethod = "printDFormAddr"; + let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg); +} + +// 256K local store address +// N.B.: The tblgen code generator expects to have two operands, an offset +// and a pointer. Of these, only the immediate is actually used. +def addr256k : Operand<iPTR> { + let PrintMethod = "printAddr256K"; + let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg); +} + +// memory s18imm(reg) operand +def memri18 : Operand<iPTR> { + let PrintMethod = "printMemRegImmS18"; + let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg); +} + +// memory register + register operand +def memrr : Operand<iPTR> { + let PrintMethod = "printMemRegReg"; + let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b); +} + +// Define SPU-specific addressing modes: These come in three basic +// flavors: +// +// D-form : [r+I10] (10-bit signed offset + reg) +// X-form : [r+r] (reg+reg) +// A-form : abs (256K LSA offset) +// D-form(2): [r+I7] (7-bit signed offset + reg) + +def dform_addr : ComplexPattern<iPTR, 2, "SelectDFormAddr", + [], [SDNPWantRoot]>; +def xform_addr : ComplexPattern<iPTR, 2, "SelectXFormAddr", + [], [SDNPWantRoot]>; +def aform_addr : ComplexPattern<iPTR, 2, "SelectAFormAddr", + [], [SDNPWantRoot]>; +def dform2_addr : ComplexPattern<iPTR, 2, "SelectDForm2Addr", + [], [SDNPWantRoot]>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp new file mode 100644 index 000000000000..1b2da5f50c81 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -0,0 +1,356 @@ +//===-- SPURegisterInfo.cpp - Cell SPU Register Information ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell implementation of the TargetRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "reginfo" +#include "SPURegisterInfo.h" +#include "SPU.h" +#include "SPUInstrBuilder.h" +#include "SPUSubtarget.h" +#include "SPUMachineFunction.h" +#include "SPUFrameLowering.h" +#include "llvm/Constants.h" +#include "llvm/Type.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include <cstdlib> + +#define GET_REGINFO_TARGET_DESC +#include "SPUGenRegisterInfo.inc" + +using namespace llvm; + +/// getRegisterNumbering - Given the enum value for some register, e.g. +/// PPC::F14, return the number that it corresponds to (e.g. 14). +unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) { + using namespace SPU; + switch (RegEnum) { + case SPU::R0: return 0; + case SPU::R1: return 1; + case SPU::R2: return 2; + case SPU::R3: return 3; + case SPU::R4: return 4; + case SPU::R5: return 5; + case SPU::R6: return 6; + case SPU::R7: return 7; + case SPU::R8: return 8; + case SPU::R9: return 9; + case SPU::R10: return 10; + case SPU::R11: return 11; + case SPU::R12: return 12; + case SPU::R13: return 13; + case SPU::R14: return 14; + case SPU::R15: return 15; + case SPU::R16: return 16; + case SPU::R17: return 17; + case SPU::R18: return 18; + case SPU::R19: return 19; + case SPU::R20: return 20; + case SPU::R21: return 21; + case SPU::R22: return 22; + case SPU::R23: return 23; + case SPU::R24: return 24; + case SPU::R25: return 25; + case SPU::R26: return 26; + case SPU::R27: return 27; + case SPU::R28: return 28; + case SPU::R29: return 29; + case SPU::R30: return 30; + case SPU::R31: return 31; + case SPU::R32: return 32; + case SPU::R33: return 33; + case SPU::R34: return 34; + case SPU::R35: return 35; + case SPU::R36: return 36; + case SPU::R37: return 37; + case SPU::R38: return 38; + case SPU::R39: return 39; + case SPU::R40: return 40; + case SPU::R41: return 41; + case SPU::R42: return 42; + case SPU::R43: return 43; + case SPU::R44: return 44; + case SPU::R45: return 45; + case SPU::R46: return 46; + case SPU::R47: return 47; + case SPU::R48: return 48; + case SPU::R49: return 49; + case SPU::R50: return 50; + case SPU::R51: return 51; + case SPU::R52: return 52; + case SPU::R53: return 53; + case SPU::R54: return 54; + case SPU::R55: return 55; + case SPU::R56: return 56; + case SPU::R57: return 57; + case SPU::R58: return 58; + case SPU::R59: return 59; + case SPU::R60: return 60; + case SPU::R61: return 61; + case SPU::R62: return 62; + case SPU::R63: return 63; + case SPU::R64: return 64; + case SPU::R65: return 65; + case SPU::R66: return 66; + case SPU::R67: return 67; + case SPU::R68: return 68; + case SPU::R69: return 69; + case SPU::R70: return 70; + case SPU::R71: return 71; + case SPU::R72: return 72; + case SPU::R73: return 73; + case SPU::R74: return 74; + case SPU::R75: return 75; + case SPU::R76: return 76; + case SPU::R77: return 77; + case SPU::R78: return 78; + case SPU::R79: return 79; + case SPU::R80: return 80; + case SPU::R81: return 81; + case SPU::R82: return 82; + case SPU::R83: return 83; + case SPU::R84: return 84; + case SPU::R85: return 85; + case SPU::R86: return 86; + case SPU::R87: return 87; + case SPU::R88: return 88; + case SPU::R89: return 89; + case SPU::R90: return 90; + case SPU::R91: return 91; + case SPU::R92: return 92; + case SPU::R93: return 93; + case SPU::R94: return 94; + case SPU::R95: return 95; + case SPU::R96: return 96; + case SPU::R97: return 97; + case SPU::R98: return 98; + case SPU::R99: return 99; + case SPU::R100: return 100; + case SPU::R101: return 101; + case SPU::R102: return 102; + case SPU::R103: return 103; + case SPU::R104: return 104; + case SPU::R105: return 105; + case SPU::R106: return 106; + case SPU::R107: return 107; + case SPU::R108: return 108; + case SPU::R109: return 109; + case SPU::R110: return 110; + case SPU::R111: return 111; + case SPU::R112: return 112; + case SPU::R113: return 113; + case SPU::R114: return 114; + case SPU::R115: return 115; + case SPU::R116: return 116; + case SPU::R117: return 117; + case SPU::R118: return 118; + case SPU::R119: return 119; + case SPU::R120: return 120; + case SPU::R121: return 121; + case SPU::R122: return 122; + case SPU::R123: return 123; + case SPU::R124: return 124; + case SPU::R125: return 125; + case SPU::R126: return 126; + case SPU::R127: return 127; + default: + report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering"); + } +} + +SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, + const TargetInstrInfo &tii) : + SPUGenRegisterInfo(SPU::R0), Subtarget(subtarget), TII(tii) +{ +} + +/// getPointerRegClass - Return the register class to use to hold pointers. +/// This is used for addressing modes. +const TargetRegisterClass * +SPURegisterInfo::getPointerRegClass(unsigned Kind) const { + return &SPU::R32CRegClass; +} + +const uint16_t * +SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const +{ + // Cell ABI calling convention + static const uint16_t SPU_CalleeSaveRegs[] = { + SPU::R80, SPU::R81, SPU::R82, SPU::R83, + SPU::R84, SPU::R85, SPU::R86, SPU::R87, + SPU::R88, SPU::R89, SPU::R90, SPU::R91, + SPU::R92, SPU::R93, SPU::R94, SPU::R95, + SPU::R96, SPU::R97, SPU::R98, SPU::R99, + SPU::R100, SPU::R101, SPU::R102, SPU::R103, + SPU::R104, SPU::R105, SPU::R106, SPU::R107, + SPU::R108, SPU::R109, SPU::R110, SPU::R111, + SPU::R112, SPU::R113, SPU::R114, SPU::R115, + SPU::R116, SPU::R117, SPU::R118, SPU::R119, + SPU::R120, SPU::R121, SPU::R122, SPU::R123, + SPU::R124, SPU::R125, SPU::R126, SPU::R127, + SPU::R2, /* environment pointer */ + SPU::R1, /* stack pointer */ + SPU::R0, /* link register */ + 0 /* end */ + }; + + return SPU_CalleeSaveRegs; +} + +/*! + R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is + generally unused) are the Cell's reserved registers + */ +BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const { + BitVector Reserved(getNumRegs()); + Reserved.set(SPU::R0); // LR + Reserved.set(SPU::R1); // SP + Reserved.set(SPU::R2); // environment pointer + return Reserved; +} + +//===----------------------------------------------------------------------===// +// Stack Frame Processing methods +//===----------------------------------------------------------------------===// + +//-------------------------------------------------------------------------- +void +SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) + const +{ + // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. + MBB.erase(I); +} + +void +SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS) const +{ + unsigned i = 0; + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + DebugLoc dl = II->getDebugLoc(); + + while (!MI.getOperand(i).isFI()) { + ++i; + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); + } + + MachineOperand &SPOp = MI.getOperand(i); + int FrameIndex = SPOp.getIndex(); + + // Now add the frame object offset to the offset from r1. + int Offset = MFI->getObjectOffset(FrameIndex); + + // Most instructions, except for generated FrameIndex additions using AIr32 + // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the + // immediate in operand 2. + unsigned OpNo = 1; + if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32) + OpNo = 2; + + MachineOperand &MO = MI.getOperand(OpNo); + + // Offset is biased by $lr's slot at the bottom. + Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize(); + assert((Offset & 0xf) == 0 + && "16-byte alignment violated in eliminateFrameIndex"); + + // Replace the FrameIndex with base register with $sp (aka $r1) + SPOp.ChangeToRegister(SPU::R1, false); + + // if 'Offset' doesn't fit to the D-form instruction's + // immediate, convert the instruction to X-form + // if the instruction is not an AI (which takes a s10 immediate), assume + // it is a load/store that can take a s14 immediate + if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset)) + || !isInt<14>(Offset)) { + int newOpcode = convertDFormToXForm(MI.getOpcode()); + unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj); + BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg ) + .addImm(Offset); + BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg()) + .addReg(tmpReg, RegState::Kill) + .addReg(SPU::R1); + // remove the replaced D-form instruction + MBB.erase(II); + } else { + MO.ChangeToImmediate(Offset); + } +} + +unsigned +SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const +{ + return SPU::R1; +} + +int +SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const +{ + switch(dFormOpcode) + { + case SPU::AIr32: return SPU::Ar32; + case SPU::LQDr32: return SPU::LQXr32; + case SPU::LQDr128: return SPU::LQXr128; + case SPU::LQDv16i8: return SPU::LQXv16i8; + case SPU::LQDv4i32: return SPU::LQXv4i32; + case SPU::LQDv4f32: return SPU::LQXv4f32; + case SPU::STQDr32: return SPU::STQXr32; + case SPU::STQDr128: return SPU::STQXr128; + case SPU::STQDv16i8: return SPU::STQXv16i8; + case SPU::STQDv4i32: return SPU::STQXv4i32; + case SPU::STQDv4f32: return SPU::STQXv4f32; + + default: assert( false && "Unhandled D to X-form conversion"); + } + // default will assert, but need to return something to keep the + // compiler happy. + return dFormOpcode; +} + +// TODO this is already copied from PPC. Could this convenience function +// be moved to the RegScavenger class? +unsigned +SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const +{ + assert(RS && "Register scavenging must be on"); + unsigned Reg = RS->FindUnusedReg(RC); + if (Reg == 0) + Reg = RS->scavengeRegister(RC, II, SPAdj); + assert( Reg && "Register scavenger failed"); + return Reg; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h new file mode 100644 index 000000000000..e5ab22422502 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.h @@ -0,0 +1,101 @@ +//===-- SPURegisterInfo.h - Cell SPU Register Information Impl --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Cell SPU implementation of the TargetRegisterInfo +// class. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_REGISTERINFO_H +#define SPU_REGISTERINFO_H + +#include "SPU.h" + +#define GET_REGINFO_HEADER +#include "SPUGenRegisterInfo.inc" + +namespace llvm { + class SPUSubtarget; + class TargetInstrInfo; + class Type; + + class SPURegisterInfo : public SPUGenRegisterInfo { + private: + const SPUSubtarget &Subtarget; + const TargetInstrInfo &TII; + + //! Predicate: Does the machine function use the link register? + bool usesLR(MachineFunction &MF) const; + + public: + SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii); + + //! Translate a register's enum value to a register number + /*! + This method translates a register's enum value to it's regiser number, + e.g. SPU::R14 -> 14. + */ + static unsigned getRegisterNumbering(unsigned RegEnum); + + /// getPointerRegClass - Return the register class to use to hold pointers. + /// This is used for addressing modes. + virtual const TargetRegisterClass * + getPointerRegClass(unsigned Kind = 0) const; + + /// After allocating this many registers, the allocator should feel + /// register pressure. The value is a somewhat random guess, based on the + /// number of non callee saved registers in the C calling convention. + virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, + MachineFunction &MF) const{ + return 50; + } + + //! Return the array of callee-saved registers + virtual const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const; + + //! Allow for scavenging, so we can get scratch registers when needed. + virtual bool requiresRegisterScavenging(const MachineFunction &MF) const + { return true; } + + //! Return the reserved registers + BitVector getReservedRegs(const MachineFunction &MF) const; + + //! Eliminate the call frame setup pseudo-instructions + void eliminateCallFramePseudoInstr(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; + //! Convert frame indicies into machine operands + void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, + RegScavenger *RS = NULL) const; + + //! Get the stack frame register (SP, aka R1) + unsigned getFrameRegister(const MachineFunction &MF) const; + + //------------------------------------------------------------------------ + // New methods added: + //------------------------------------------------------------------------ + + //! Convert D-form load/store to X-form load/store + /*! + Converts a regiser displacement load/store into a register-indexed + load/store for large stack frames, when the stack frame exceeds the + range of a s10 displacement. + */ + int convertDFormToXForm(int dFormOpcode) const; + + //! Acquire an unused register in an emergency. + unsigned findScratchRegister(MachineBasicBlock::iterator II, + RegScavenger *RS, + const TargetRegisterClass *RC, + int SPAdj) const; + + }; +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td new file mode 100644 index 000000000000..f27b042edd63 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterInfo.td @@ -0,0 +1,183 @@ +//===-- SPURegisterInfo.td - The Cell SPU Register File ----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// +//===----------------------------------------------------------------------===// + +class SPUReg<string n> : Register<n> { + let Namespace = "SPU"; +} + +// The SPU's register are all 128-bits wide, which makes specifying the +// registers relatively easy, if relatively mundane: + +class SPUVecReg<bits<7> num, string n> : SPUReg<n> { + field bits<7> Num = num; +} + +def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>; +def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>; +def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>; +def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>; +def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>; +def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>; +def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>; +def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>; +def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>; +def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>; +def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>; +def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>; +def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>; +def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>; +def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>; +def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>; +def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>; +def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>; +def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>; +def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>; +def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>; +def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>; +def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>; +def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>; +def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>; +def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>; +def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>; +def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>; +def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>; +def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>; +def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>; +def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>; +def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>; +def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>; +def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>; +def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>; +def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>; +def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>; +def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>; +def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>; +def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>; +def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>; +def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>; +def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>; +def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>; +def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>; +def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>; +def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>; +def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>; +def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>; +def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>; +def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>; +def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>; +def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>; +def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>; +def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>; +def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>; +def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>; +def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>; +def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>; +def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>; +def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>; +def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>; +def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>; +def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>; +def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>; +def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>; +def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>; +def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>; +def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>; +def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>; +def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>; +def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>; +def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>; +def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>; +def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>; +def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>; +def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>; +def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>; +def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>; +def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>; +def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>; +def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>; +def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>; +def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>; +def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>; +def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>; +def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>; +def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>; +def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>; +def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>; +def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>; +def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>; +def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>; +def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>; +def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>; +def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>; +def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>; +def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>; +def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>; +def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>; +def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>; +def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>; +def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>; +def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>; +def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>; +def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>; +def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>; +def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>; +def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>; +def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>; +def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>; +def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>; +def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>; +def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>; +def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>; +def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>; +def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>; +def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>; +def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>; +def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>; +def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>; +def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>; +def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>; +def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>; +def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>; +def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>; +def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>; + +/* Need floating point status register here: */ +/* def FPCSR : ... */ + +// The SPU's registers as 128-bit wide entities, and can function as general +// purpose registers, where the operands are in the "preferred slot": +// The non-volatile registers are allocated in reverse order, like PPC does it. +def GPRC : RegisterClass<"SPU", [i128], 128, + (add (sequence "R%u", 0, 79), + (sequence "R%u", 127, 80))>; + +// The SPU's registers as 64-bit wide (double word integer) "preferred slot": +def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>; + +// The SPU's registers as 64-bit wide (double word) FP "preferred slot": +def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>; + +// The SPU's registers as 32-bit wide (word) "preferred slot": +def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>; + +// The SPU's registers as single precision floating point "preferred slot": +def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>; + +// The SPU's registers as 16-bit wide (halfword) "preferred slot": +def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>; + +// The SPU's registers as 8-bit wide (byte) "preferred slot": +def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>; + +// The SPU's registers as vector registers: +def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128, + (add GPRC)>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h new file mode 100644 index 000000000000..e557ed340a28 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPURegisterNames.h @@ -0,0 +1,19 @@ +//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_REGISTER_NAMES_H +#define SPU_REGISTER_NAMES_H + +// Define symbolic names for Cell registers. This defines a mapping from +// register name to register number. +// +#define GET_REGINFO_ENUM +#include "SPUGenRegisterInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td new file mode 100644 index 000000000000..9ccd0844e48e --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUSchedule.td @@ -0,0 +1,59 @@ +//===-- SPUSchedule.td - Cell Scheduling Definitions -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Even pipeline: + +def EVEN_UNIT : FuncUnit; // Even execution unit: (PC & 0x7 == 000) +def ODD_UNIT : FuncUnit; // Odd execution unit: (PC & 0x7 == 100) + +//===----------------------------------------------------------------------===// +// Instruction Itinerary classes used for Cell SPU +//===----------------------------------------------------------------------===// + +def LoadStore : InstrItinClass; // ODD_UNIT +def BranchHints : InstrItinClass; // ODD_UNIT +def BranchResolv : InstrItinClass; // ODD_UNIT +def ChanOpSPR : InstrItinClass; // ODD_UNIT +def ShuffleOp : InstrItinClass; // ODD_UNIT +def SelectOp : InstrItinClass; // ODD_UNIT +def GatherOp : InstrItinClass; // ODD_UNIT +def LoadNOP : InstrItinClass; // ODD_UNIT +def ExecNOP : InstrItinClass; // EVEN_UNIT +def SPrecFP : InstrItinClass; // EVEN_UNIT +def DPrecFP : InstrItinClass; // EVEN_UNIT +def FPInt : InstrItinClass; // EVEN_UNIT (FP<->integer) +def ByteOp : InstrItinClass; // EVEN_UNIT +def IntegerOp : InstrItinClass; // EVEN_UNIT +def IntegerMulDiv: InstrItinClass; // EVEN_UNIT +def RotShiftVec : InstrItinClass; // EVEN_UNIT Inter vector +def RotShiftQuad : InstrItinClass; // ODD_UNIT Entire quad +def ImmLoad : InstrItinClass; // EVEN_UNIT + +/* Note: The itinerary for the Cell SPU is somewhat contrived... */ +def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [ + InstrItinData<LoadStore , [InstrStage<6, [ODD_UNIT]>]>, + InstrItinData<BranchHints , [InstrStage<6, [ODD_UNIT]>]>, + InstrItinData<BranchResolv, [InstrStage<4, [ODD_UNIT]>]>, + InstrItinData<ChanOpSPR , [InstrStage<6, [ODD_UNIT]>]>, + InstrItinData<ShuffleOp , [InstrStage<4, [ODD_UNIT]>]>, + InstrItinData<SelectOp , [InstrStage<4, [ODD_UNIT]>]>, + InstrItinData<GatherOp , [InstrStage<4, [ODD_UNIT]>]>, + InstrItinData<LoadNOP , [InstrStage<1, [ODD_UNIT]>]>, + InstrItinData<ExecNOP , [InstrStage<1, [EVEN_UNIT]>]>, + InstrItinData<SPrecFP , [InstrStage<6, [EVEN_UNIT]>]>, + InstrItinData<DPrecFP , [InstrStage<13, [EVEN_UNIT]>]>, + InstrItinData<FPInt , [InstrStage<2, [EVEN_UNIT]>]>, + InstrItinData<ByteOp , [InstrStage<4, [EVEN_UNIT]>]>, + InstrItinData<IntegerOp , [InstrStage<2, [EVEN_UNIT]>]>, + InstrItinData<RotShiftVec , [InstrStage<4, [EVEN_UNIT]>]>, + InstrItinData<RotShiftQuad, [InstrStage<4, [ODD_UNIT]>]>, + InstrItinData<IntegerMulDiv,[InstrStage<7, [EVEN_UNIT]>]>, + InstrItinData<ImmLoad , [InstrStage<2, [EVEN_UNIT]>]> + ]>; diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp new file mode 100644 index 000000000000..5732fd43cdc2 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp @@ -0,0 +1,23 @@ +//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SPUSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "cellspu-selectiondag-info" +#include "SPUTargetMachine.h" +using namespace llvm; + +SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM) + : TargetSelectionDAGInfo(TM) { +} + +SPUSelectionDAGInfo::~SPUSelectionDAGInfo() { +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h new file mode 100644 index 000000000000..39257d92c400 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUSelectionDAGInfo.h @@ -0,0 +1,31 @@ +//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the CellSPU subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef CELLSPUSELECTIONDAGINFO_H +#define CELLSPUSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +class SPUTargetMachine; + +class SPUSelectionDAGInfo : public TargetSelectionDAGInfo { +public: + explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM); + ~SPUSelectionDAGInfo(); +}; + +} + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp new file mode 100644 index 000000000000..eec2d250be7f --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.cpp @@ -0,0 +1,65 @@ +//===-- SPUSubtarget.cpp - STI Cell SPU Subtarget Information -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CellSPU-specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#include "SPUSubtarget.h" +#include "SPU.h" +#include "SPURegisterInfo.h" +#include "llvm/Support/TargetRegistry.h" + +#define GET_SUBTARGETINFO_TARGET_DESC +#define GET_SUBTARGETINFO_CTOR +#include "SPUGenSubtargetInfo.inc" + +using namespace llvm; + +SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS) : + SPUGenSubtargetInfo(TT, CPU, FS), + StackAlignment(16), + ProcDirective(SPU::DEFAULT_PROC), + UseLargeMem(false) +{ + // Should be the target SPU processor type. For now, since there's only + // one, simply default to the current "v0" default: + std::string default_cpu("v0"); + + // Parse features string. + ParseSubtargetFeatures(default_cpu, FS); + + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(default_cpu); +} + +/// SetJITMode - This is called to inform the subtarget info that we are +/// producing code for the JIT. +void SPUSubtarget::SetJITMode() { +} + +/// Enable PostRA scheduling for optimization levels -O2 and -O3. +bool SPUSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; + // CriticalPathsRCs seems to be the set of + // RegisterClasses that antidep breakings are performed for. + // Do it for all register classes + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(&SPU::R8CRegClass); + CriticalPathRCs.push_back(&SPU::R16CRegClass); + CriticalPathRCs.push_back(&SPU::R32CRegClass); + CriticalPathRCs.push_back(&SPU::R32FPRegClass); + CriticalPathRCs.push_back(&SPU::R64CRegClass); + CriticalPathRCs.push_back(&SPU::VECREGRegClass); + return OptLevel >= CodeGenOpt::Default; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h new file mode 100644 index 000000000000..7c4aa1430217 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUSubtarget.h @@ -0,0 +1,97 @@ +//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef CELLSUBTARGET_H +#define CELLSUBTARGET_H + +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include <string> + +#define GET_SUBTARGETINFO_HEADER +#include "SPUGenSubtargetInfo.inc" + +namespace llvm { + class GlobalValue; + class StringRef; + + namespace SPU { + enum { + PROC_NONE, + DEFAULT_PROC + }; + } + + class SPUSubtarget : public SPUGenSubtargetInfo { + protected: + /// stackAlignment - The minimum alignment known to hold of the stack frame + /// on entry to the function and which must be maintained by every function. + unsigned StackAlignment; + + /// Selected instruction itineraries (one entry per itinerary class.) + InstrItineraryData InstrItins; + + /// Which SPU processor (this isn't really used, but it's there to keep + /// the C compiler happy) + unsigned ProcDirective; + + /// Use (assume) large memory -- effectively disables the LQA/STQA + /// instructions that assume 259K local store. + bool UseLargeMem; + + public: + /// This constructor initializes the data members to match that + /// of the specified triple. + /// + SPUSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS); + + /// ParseSubtargetFeatures - Parses features string setting specified + /// subtarget options. Definition of function is auto generated by tblgen. + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + /// SetJITMode - This is called to inform the subtarget info that we are + /// producing code for the JIT. + void SetJITMode(); + + /// getStackAlignment - Returns the minimum alignment known to hold of the + /// stack frame on entry to the function and which must be maintained by + /// every function for this subtarget. + unsigned getStackAlignment() const { return StackAlignment; } + + /// getInstrItins - Return the instruction itineraies based on subtarget + /// selection. + const InstrItineraryData &getInstrItineraryData() const { + return InstrItins; + } + + /// Use large memory addressing predicate + bool usingLargeMem() const { + return UseLargeMem; + } + + /// getTargetDataString - Return the pointer size and type alignment + /// properties of this subtarget. + const char *getTargetDataString() const { + return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" + "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128" + "-s:128:128-n32:64"; + } + + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; + }; +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp new file mode 100644 index 000000000000..21f6b25bf256 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.cpp @@ -0,0 +1,93 @@ +//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Top-level implementation for the Cell SPU target. +// +//===----------------------------------------------------------------------===// + +#include "SPUTargetMachine.h" +#include "SPU.h" +#include "llvm/PassManager.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +extern "C" void LLVMInitializeCellSPUTarget() { + // Register the target. + RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget); +} + +const std::pair<unsigned, int> * +SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const { + NumEntries = 1; + return &LR[0]; +} + +SPUTargetMachine::SPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS), + DataLayout(Subtarget.getTargetDataString()), + InstrInfo(*this), + FrameLowering(Subtarget), + TLInfo(*this), + TSInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { +} + +//===----------------------------------------------------------------------===// +// Pass Pipeline Configuration +//===----------------------------------------------------------------------===// + +namespace { +/// SPU Code Generator Pass Configuration Options. +class SPUPassConfig : public TargetPassConfig { +public: + SPUPassConfig(SPUTargetMachine *TM, PassManagerBase &PM) + : TargetPassConfig(TM, PM) {} + + SPUTargetMachine &getSPUTargetMachine() const { + return getTM<SPUTargetMachine>(); + } + + virtual bool addInstSelector(); + virtual bool addPreEmitPass(); +}; +} // namespace + +TargetPassConfig *SPUTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SPUPassConfig(this, PM); +} + +bool SPUPassConfig::addInstSelector() { + // Install an instruction selector. + PM.add(createSPUISelDag(getSPUTargetMachine())); + return false; +} + +// passes to run just before printing the assembly +bool SPUPassConfig::addPreEmitPass() { + // load the TCE instruction scheduler, if available via + // loaded plugins + typedef llvm::FunctionPass* (*BuilderFunc)(const char*); + BuilderFunc schedulerCreator = + (BuilderFunc)(intptr_t)sys::DynamicLibrary::SearchForAddressOfSymbol( + "createTCESchedulerPass"); + if (schedulerCreator != NULL) + PM.add(schedulerCreator("cellspu")); + + //align instructions with nops/lnops for dual issue + PM.add(createSPUNopFillerPass(getSPUTargetMachine())); + return true; +} diff --git a/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h new file mode 100644 index 000000000000..3e5d38c919c1 --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/SPUTargetMachine.h @@ -0,0 +1,87 @@ +//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the CellSPU-specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef SPU_TARGETMACHINE_H +#define SPU_TARGETMACHINE_H + +#include "SPUSubtarget.h" +#include "SPUInstrInfo.h" +#include "SPUISelLowering.h" +#include "SPUSelectionDAGInfo.h" +#include "SPUFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetData.h" + +namespace llvm { + +/// SPUTargetMachine +/// +class SPUTargetMachine : public LLVMTargetMachine { + SPUSubtarget Subtarget; + const TargetData DataLayout; + SPUInstrInfo InstrInfo; + SPUFrameLowering FrameLowering; + SPUTargetLowering TLInfo; + SPUSelectionDAGInfo TSInfo; + InstrItineraryData InstrItins; +public: + SPUTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + + /// Return the subtarget implementation object + virtual const SPUSubtarget *getSubtargetImpl() const { + return &Subtarget; + } + virtual const SPUInstrInfo *getInstrInfo() const { + return &InstrInfo; + } + virtual const SPUFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + /*! + \note Cell SPU does not support JIT today. It could support JIT at some + point. + */ + virtual TargetJITInfo *getJITInfo() { + return NULL; + } + + virtual const SPUTargetLowering *getTargetLowering() const { + return &TLInfo; + } + + virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const { + return &TSInfo; + } + + virtual const SPURegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + + virtual const TargetData *getTargetData() const { + return &DataLayout; + } + + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; + } + + // Pass Pipeline Configuration + virtual TargetPassConfig *createPassConfig(PassManagerBase &PM); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp new file mode 100644 index 000000000000..84aadfad6f8d --- /dev/null +++ b/contrib/llvm/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp @@ -0,0 +1,20 @@ +//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "SPU.h" +#include "llvm/Module.h" +#include "llvm/Support/TargetRegistry.h" +using namespace llvm; + +Target llvm::TheCellSPUTarget; + +extern "C" void LLVMInitializeCellSPUTargetInfo() { + RegisterTarget<Triple::cellspu> + X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]"); +} |