aboutsummaryrefslogtreecommitdiff
path: root/lib/Target/Hexagon/HexagonPatterns.td
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/Hexagon/HexagonPatterns.td')
-rw-r--r--lib/Target/Hexagon/HexagonPatterns.td194
1 files changed, 134 insertions, 60 deletions
diff --git a/lib/Target/Hexagon/HexagonPatterns.td b/lib/Target/Hexagon/HexagonPatterns.td
index fb731f56bfbf..485e658e1c84 100644
--- a/lib/Target/Hexagon/HexagonPatterns.td
+++ b/lib/Target/Hexagon/HexagonPatterns.td
@@ -99,13 +99,21 @@ def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
+def SDTVecLeaf:
+ SDTypeProfile<1, 0, [SDTCisVec<0>]>;
def SDTVecVecIntOp:
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<1,2>,
SDTCisVT<3,i32>]>;
+def HexagonPTRUE: SDNode<"HexagonISD::PTRUE", SDTVecLeaf>;
+def HexagonPFALSE: SDNode<"HexagonISD::PFALSE", SDTVecLeaf>;
def HexagonVALIGN: SDNode<"HexagonISD::VALIGN", SDTVecVecIntOp>;
def HexagonVALIGNADDR: SDNode<"HexagonISD::VALIGNADDR", SDTIntUnaryOp>;
+def ptrue: PatFrag<(ops), (HexagonPTRUE)>;
+def pfalse: PatFrag<(ops), (HexagonPFALSE)>;
+def pnot: PatFrag<(ops node:$Pu), (xor node:$Pu, ptrue)>;
+
def valign: PatFrag<(ops node:$Vt, node:$Vs, node:$Ru),
(HexagonVALIGN node:$Vt, node:$Vs, node:$Ru)>;
def valignaddr: PatFrag<(ops node:$Addr), (HexagonVALIGNADDR node:$Addr)>;
@@ -154,6 +162,11 @@ def IsNPow2_64H: PatLeaf<(i64 imm), [{
return isPowerOf2_64(NV) && Log2_64(NV) >= 32;
}]>;
+class IsULE<int Width, int Arg>: PatLeaf<(i32 imm),
+ "uint64_t V = N->getZExtValue();" #
+ "return isUInt<" # Width # ">(V) && V <= " # Arg # ";"
+>;
+
class IsUGT<int Width, int Arg>: PatLeaf<(i32 imm),
"uint64_t V = N->getZExtValue();" #
"return isUInt<" # Width # ">(V) && V > " # Arg # ";"
@@ -320,6 +333,24 @@ multiclass SelMinMax_pats<PatFrag CmpOp, PatFrag Val,
(InstB Val:$A, Val:$B)>;
}
+multiclass MinMax_pats<InstHexagon PickT, InstHexagon PickS,
+ PatFrag Sel, PatFrag CmpOp,
+ ValueType CmpType, PatFrag CmpPred> {
+ def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)),
+ CmpPred:$Vt, CmpPred:$Vs),
+ (PickT CmpPred:$Vs, CmpPred:$Vt)>;
+ def: Pat<(Sel (CmpType (CmpOp CmpPred:$Vs, CmpPred:$Vt)),
+ CmpPred:$Vs, CmpPred:$Vt),
+ (PickS CmpPred:$Vs, CmpPred:$Vt)>;
+}
+
+// Bitcasts between same-size vector types are no-ops, except for the
+// actual type change.
+multiclass NopCast_pat<ValueType Ty1, ValueType Ty2, RegisterClass RC> {
+ def: Pat<(Ty1 (bitconvert (Ty2 RC:$Val))), (Ty1 RC:$Val)>;
+ def: Pat<(Ty2 (bitconvert (Ty1 RC:$Val))), (Ty2 RC:$Val)>;
+}
+
// Frags for commonly used SDNodes.
def Add: pf2<add>; def And: pf2<and>; def Sra: pf2<sra>;
@@ -403,17 +434,18 @@ def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
def: Pat<(i64 (bitconvert F64:$v)), (I64:$v)>;
def: Pat<(f64 (bitconvert I64:$v)), (F64:$v)>;
-multiclass Cast_pat<ValueType Ta, ValueType Tb, RegisterClass RC> {
- def: Pat<(Tb (bitconvert (Ta RC:$Rs))), (Tb RC:$Rs)>;
- def: Pat<(Ta (bitconvert (Tb RC:$Rs))), (Ta RC:$Rs)>;
-}
-
-// Bit convert vector types to integers.
-defm: Cast_pat<v4i8, i32, IntRegs>;
-defm: Cast_pat<v2i16, i32, IntRegs>;
-defm: Cast_pat<v8i8, i64, DoubleRegs>;
-defm: Cast_pat<v4i16, i64, DoubleRegs>;
-defm: Cast_pat<v2i32, i64, DoubleRegs>;
+// Bit convert 32- and 64-bit types.
+// All of these are bitcastable to one another: i32, v2i16, v4i8.
+defm: NopCast_pat<i32, v2i16, IntRegs>;
+defm: NopCast_pat<i32, v4i8, IntRegs>;
+defm: NopCast_pat<v2i16, v4i8, IntRegs>;
+// All of these are bitcastable to one another: i64, v2i32, v4i16, v8i8.
+defm: NopCast_pat<i64, v2i32, DoubleRegs>;
+defm: NopCast_pat<i64, v4i16, DoubleRegs>;
+defm: NopCast_pat<i64, v8i8, DoubleRegs>;
+defm: NopCast_pat<v2i32, v4i16, DoubleRegs>;
+defm: NopCast_pat<v2i32, v8i8, DoubleRegs>;
+defm: NopCast_pat<v4i16, v8i8, DoubleRegs>;
// --(3) Extend/truncate -------------------------------------------------
@@ -497,7 +529,9 @@ def: Pat<(v2i16 (trunc V2I32:$Rs)),
//
def: Pat<(not I1:$Ps), (C2_not I1:$Ps)>;
-def: Pat<(not V8I1:$Ps), (C2_not V8I1:$Ps)>;
+def: Pat<(pnot V2I1:$Ps), (C2_not V2I1:$Ps)>;
+def: Pat<(pnot V4I1:$Ps), (C2_not V4I1:$Ps)>;
+def: Pat<(pnot V8I1:$Ps), (C2_not V8I1:$Ps)>;
def: Pat<(add I1:$Ps, -1), (C2_not I1:$Ps)>;
multiclass BoolOpR_RR_pat<InstHexagon MI, PatFrag Op> {
@@ -816,14 +850,6 @@ def: Pat<(select (not I1:$Pu), f32ImmPred:$I, F32:$Rs),
def: Pat<(select (not I1:$Pu), F32:$Rt, f32ImmPred:$I),
(C2_muxri I1:$Pu, (ftoi $I), F32:$Rt)>;
-def: Pat<(select I1:$Pu, V4I8:$Rs, V4I8:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
-def: Pat<(select I1:$Pu, V2I16:$Rs, V2I16:$Rt),
- (LoReg (C2_vmux I1:$Pu, (ToAext64 $Rs), (ToAext64 $Rt)))>;
-def: Pat<(select I1:$Pu, V2I32:$Rs, V2I32:$Rt),
- (Combinew (C2_mux I1:$Pu, (HiReg $Rs), (HiReg $Rt)),
- (C2_mux I1:$Pu, (LoReg $Rs), (LoReg $Rt)))>;
-
def: Pat<(vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt),
(C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
@@ -831,6 +857,14 @@ def: Pat<(vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt),
def: Pat<(vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt),
(C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+def: Pat<(vselect (pnot V8I1:$Pu), V8I8:$Rs, V8I8:$Rt),
+ (C2_vmux V8I1:$Pu, V8I8:$Rt, V8I8:$Rs)>;
+def: Pat<(vselect (pnot V4I1:$Pu), V4I16:$Rs, V4I16:$Rt),
+ (C2_vmux V4I1:$Pu, V4I16:$Rt, V4I16:$Rs)>;
+def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt),
+ (C2_vmux V2I1:$Pu, V2I32:$Rt, V2I32:$Rs)>;
+
+
// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw).
def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
(C2_or (C2_and I1:$Pu, I1:$Pv),
@@ -863,32 +897,44 @@ let AddedComplexity = 200 in {
}
let AddedComplexity = 200 in {
- defm: SelMinMax_pats<setge, I32, A2_max, A2_min>;
- defm: SelMinMax_pats<setgt, I32, A2_max, A2_min>;
- defm: SelMinMax_pats<setle, I32, A2_min, A2_max>;
- defm: SelMinMax_pats<setlt, I32, A2_min, A2_max>;
- defm: SelMinMax_pats<setuge, I32, A2_maxu, A2_minu>;
- defm: SelMinMax_pats<setugt, I32, A2_maxu, A2_minu>;
- defm: SelMinMax_pats<setule, I32, A2_minu, A2_maxu>;
- defm: SelMinMax_pats<setult, I32, A2_minu, A2_maxu>;
-
- defm: SelMinMax_pats<setge, I64, A2_maxp, A2_minp>;
- defm: SelMinMax_pats<setgt, I64, A2_maxp, A2_minp>;
- defm: SelMinMax_pats<setle, I64, A2_minp, A2_maxp>;
- defm: SelMinMax_pats<setlt, I64, A2_minp, A2_maxp>;
- defm: SelMinMax_pats<setuge, I64, A2_maxup, A2_minup>;
- defm: SelMinMax_pats<setugt, I64, A2_maxup, A2_minup>;
- defm: SelMinMax_pats<setule, I64, A2_minup, A2_maxup>;
- defm: SelMinMax_pats<setult, I64, A2_minup, A2_maxup>;
+ defm: MinMax_pats<A2_min, A2_max, select, setgt, i1, I32>;
+ defm: MinMax_pats<A2_min, A2_max, select, setge, i1, I32>;
+ defm: MinMax_pats<A2_max, A2_min, select, setlt, i1, I32>;
+ defm: MinMax_pats<A2_max, A2_min, select, setle, i1, I32>;
+ defm: MinMax_pats<A2_minu, A2_maxu, select, setugt, i1, I32>;
+ defm: MinMax_pats<A2_minu, A2_maxu, select, setuge, i1, I32>;
+ defm: MinMax_pats<A2_maxu, A2_minu, select, setult, i1, I32>;
+ defm: MinMax_pats<A2_maxu, A2_minu, select, setule, i1, I32>;
+
+ defm: MinMax_pats<A2_minp, A2_maxp, select, setgt, i1, I64>;
+ defm: MinMax_pats<A2_minp, A2_maxp, select, setge, i1, I64>;
+ defm: MinMax_pats<A2_maxp, A2_minp, select, setlt, i1, I64>;
+ defm: MinMax_pats<A2_maxp, A2_minp, select, setle, i1, I64>;
+ defm: MinMax_pats<A2_minup, A2_maxup, select, setugt, i1, I64>;
+ defm: MinMax_pats<A2_minup, A2_maxup, select, setuge, i1, I64>;
+ defm: MinMax_pats<A2_maxup, A2_minup, select, setult, i1, I64>;
+ defm: MinMax_pats<A2_maxup, A2_minup, select, setule, i1, I64>;
}
let AddedComplexity = 100 in {
- defm: SelMinMax_pats<setolt, F32, F2_sfmin, F2_sfmax>;
- defm: SelMinMax_pats<setole, F32, F2_sfmin, F2_sfmax>;
- defm: SelMinMax_pats<setogt, F32, F2_sfmax, F2_sfmin>;
- defm: SelMinMax_pats<setoge, F32, F2_sfmax, F2_sfmin>;
-}
-
+ defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setogt, i1, F32>;
+ defm: MinMax_pats<F2_sfmin, F2_sfmax, select, setoge, i1, F32>;
+ defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setolt, i1, F32>;
+ defm: MinMax_pats<F2_sfmax, F2_sfmin, select, setole, i1, F32>;
+}
+
+defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setgt, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminb, A2_vmaxb, vselect, setge, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setgt, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminh, A2_vmaxh, vselect, setge, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setgt, v2i1, V2I32>;
+defm: MinMax_pats<A2_vminw, A2_vmaxw, vselect, setge, v2i1, V2I32>;
+defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setugt, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminub, A2_vmaxub, vselect, setuge, v8i1, V8I8>;
+defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setugt, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminuh, A2_vmaxuh, vselect, setuge, v4i1, V4I16>;
+defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setugt, v2i1, V2I32>;
+defm: MinMax_pats<A2_vminuw, A2_vmaxuw, vselect, setuge, v2i1, V2I32>;
// --(7) Insert/extract --------------------------------------------------
//
@@ -1639,19 +1685,19 @@ def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
//
// Count leading zeros.
-def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
// Count trailing zeros.
-def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>;
+def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
// Count leading ones.
-def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
// Count trailing ones.
-def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>;
+def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
// Define leading/trailing patterns that require zero-extensions to 64 bits.
@@ -1706,6 +1752,7 @@ let AddedComplexity = 20 in { // Complexity greater than and/or/xor
(i32 (LoReg $Rss)))>;
}
+
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
(S2_tstbit_i IntRegs:$Rs, imm:$u5)>;
@@ -1717,6 +1764,20 @@ let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
(S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
}
+def: Pat<(and (srl I32:$Rs, u5_0ImmPred:$u5), 1),
+ (I1toI32 (S2_tstbit_i I32:$Rs, imm:$u5))>;
+def: Pat<(and (srl I64:$Rss, IsULE<32,31>:$u6), 1),
+ (ToZext64 (I1toI32 (S2_tstbit_i (LoReg $Rss), imm:$u6)))>;
+def: Pat<(and (srl I64:$Rss, IsUGT<32,31>:$u6), 1),
+ (ToZext64 (I1toI32 (S2_tstbit_i (HiReg $Rss), (UDEC32 $u6))))>;
+
+def: Pat<(and (not (srl I32:$Rs, u5_0ImmPred:$u5)), 1),
+ (I1toI32 (S4_ntstbit_i I32:$Rs, imm:$u5))>;
+def: Pat<(and (not (srl I64:$Rss, IsULE<32,31>:$u6)), 1),
+ (ToZext64 (I1toI32 (S4_ntstbit_i (LoReg $Rss), imm:$u6)))>;
+def: Pat<(and (not (srl I64:$Rss, IsUGT<32,31>:$u6)), 1),
+ (ToZext64 (I1toI32 (S4_ntstbit_i (HiReg $Rss), (UDEC32 $u6))))>;
+
let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
(C2_bitsclri IntRegs:$Rs, imm:$u6)>;
@@ -1737,23 +1798,28 @@ def: Pat<(HexagonTSTBIT I32:$Rs, u5_0ImmPred:$u5),
def: Pat<(HexagonTSTBIT I32:$Rs, I32:$Rt),
(S2_tstbit_r I32:$Rs, I32:$Rt)>;
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+// if ([!]tstbit(...)) jump ...
let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
- def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
- (S4_ntstbit_i I32:$Rs, imm:$u5)>;
+ def: Pat<(i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)),
+ (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
+ def: Pat<(i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)),
+ (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
(S4_ntstbit_r I32:$Rs, I32:$Rt)>;
+ def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
+ (S2_tstbit_r I32:$Rs, I32:$Rt)>;
}
-// Add extra complexity to prefer these instructions over bitsset/bitsclr.
-// The reason is that tstbit/ntstbit can be folded into a compound instruction:
-// if ([!]tstbit(...)) jump ...
-let AddedComplexity = 100 in
-def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
- (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
-
-let AddedComplexity = 100 in
-def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
- (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
+def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64L:$u6), 0)),
+ (S4_ntstbit_i (LoReg $Rs), (Log2_64 $u6))>;
+def: Pat<(i1 (seteq (and I64:$Rs, IsPow2_64H:$u6), 0)),
+ (S4_ntstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_64 $u6))))>;
+def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64L:$u6), 0)),
+ (S2_tstbit_i (LoReg $Rs), (Log2_32 imm:$u6))>;
+def: Pat<(i1 (setne (and I64:$Rs, IsPow2_64H:$u6), 0)),
+ (S2_tstbit_i (HiReg $Rs), (UDEC32 (i32 (Log2_32 imm:$u6))))>;
// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
// represented as a compare against "value & 0xFF", which is an exact match
@@ -1773,10 +1839,18 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
let AddedComplexity = 100 in {
// Avoid A4_rcmp[n]eqi in these cases:
+ def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
+ (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
(I1toI32 (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
+ def: Pat<(i32 (zext (i1 (seteq (and I32:$Rs, IsPow2_32:$u5), 0)))),
+ (I1toI32 (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5)))>;
+ def: Pat<(i32 (zext (i1 (setne (and I32:$Rs, IsPow2_32:$u5), 0)))),
+ (I1toI32 (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5)))>;
def: Pat<(i32 (zext (i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
- (I1toI32 (S4_ntstbit_r IntRegs:$Rs, IntRegs:$Rt))>;
+ (I1toI32 (S4_ntstbit_r I32:$Rs, I32:$Rt))>;
+ def: Pat<(i32 (zext (i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)))),
+ (I1toI32 (S2_tstbit_r I32:$Rs, I32:$Rt))>;
}
// --(11) PIC ------------------------------------------------------------