aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/utils/PerfectShuffle/PerfectShuffle.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/utils/PerfectShuffle/PerfectShuffle.cpp')
-rw-r--r--contrib/llvm/utils/PerfectShuffle/PerfectShuffle.cpp571
1 files changed, 0 insertions, 571 deletions
diff --git a/contrib/llvm/utils/PerfectShuffle/PerfectShuffle.cpp b/contrib/llvm/utils/PerfectShuffle/PerfectShuffle.cpp
deleted file mode 100644
index b94a7d326d19..000000000000
--- a/contrib/llvm/utils/PerfectShuffle/PerfectShuffle.cpp
+++ /dev/null
@@ -1,571 +0,0 @@
-//===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file computes an optimal sequence of instructions for doing all shuffles
-// of two 4-element vectors. With a release build and when configured to emit
-// an altivec instruction table, this takes about 30s to run on a 2.7Ghz
-// PowerPC G5.
-//
-//===----------------------------------------------------------------------===//
-
-#include <iostream>
-#include <vector>
-#include <cassert>
-#include <cstdlib>
-struct Operator;
-
-// Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes
-// an element from that value of the input vectors. A value of 8 means the
-// entry is undefined.
-
-// Mask manipulation functions.
-static inline unsigned short MakeMask(unsigned V0, unsigned V1,
- unsigned V2, unsigned V3) {
- return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
-}
-
-/// getMaskElt - Return element N of the specified mask.
-static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
- return (Mask >> ((3-Elt)*4)) & 0xF;
-}
-
-static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
- unsigned FieldShift = ((3-Elt)*4);
- return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
-}
-
-// Reject elements where the values are 9-15.
-static bool isValidMask(unsigned short Mask) {
- unsigned short UndefBits = Mask & 0x8888;
- return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
-}
-
-/// hasUndefElements - Return true if any of the elements in the mask are undefs
-///
-static bool hasUndefElements(unsigned short Mask) {
- return (Mask & 0x8888) != 0;
-}
-
-/// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
-/// including undef values..
-static bool isOnlyLHSMask(unsigned short Mask) {
- return (Mask & 0x4444) == 0;
-}
-
-/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
-/// refer to the LHS only (for when one argument value is passed into the same
-/// function twice).
-#if 0
-static unsigned short getLHSOnlyMask(unsigned short Mask) {
- return Mask & 0xBBBB; // Keep only LHS and Undefs.
-}
-#endif
-
-/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
-/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
-static unsigned getCompressedMask(unsigned short Mask) {
- return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
- getMaskElt(Mask, 2)*9 + getMaskElt(Mask, 3);
-}
-
-static void PrintMask(unsigned i, std::ostream &OS) {
- OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
- << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
- << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
- << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
- << ">";
-}
-
-/// ShuffleVal - This represents a shufflevector operation.
-struct ShuffleVal {
- unsigned Cost; // Number of instrs used to generate this value.
- Operator *Op; // The Operation used to generate this value.
- unsigned short Arg0, Arg1; // Input operands for this value.
-
- ShuffleVal() : Cost(1000000) {}
-};
-
-
-/// ShufTab - This is the actual shuffle table that we are trying to generate.
-///
-static ShuffleVal ShufTab[65536];
-
-/// TheOperators - All of the operators that this target supports.
-static std::vector<Operator*> TheOperators;
-
-/// Operator - This is a vector operation that is available for use.
-struct Operator {
- unsigned short ShuffleMask;
- unsigned short OpNum;
- const char *Name;
- unsigned Cost;
-
- Operator(unsigned short shufflemask, const char *name, unsigned opnum,
- unsigned cost = 1)
- : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) {
- TheOperators.push_back(this);
- }
- ~Operator() {
- assert(TheOperators.back() == this);
- TheOperators.pop_back();
- }
-
- bool isOnlyLHSOperator() const {
- return isOnlyLHSMask(ShuffleMask);
- }
-
- const char *getName() const { return Name; }
- unsigned getCost() const { return Cost; }
-
- unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
- // Extract the elements from LHSMask and RHSMask, as appropriate.
- unsigned Result = 0;
- for (unsigned i = 0; i != 4; ++i) {
- unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
- unsigned ResElt;
- if (SrcElt < 4)
- ResElt = getMaskElt(LHSMask, SrcElt);
- else if (SrcElt < 8)
- ResElt = getMaskElt(RHSMask, SrcElt-4);
- else {
- assert(SrcElt == 8 && "Bad src elt!");
- ResElt = 8;
- }
- Result |= ResElt << (4*i);
- }
- return Result;
- }
-};
-
-static const char *getZeroCostOpName(unsigned short Op) {
- if (ShufTab[Op].Arg0 == 0x0123)
- return "LHS";
- else if (ShufTab[Op].Arg0 == 0x4567)
- return "RHS";
- else {
- assert(0 && "bad zero cost operation");
- abort();
- }
-}
-
-static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
- unsigned short ThisOp = Vals[ValNo];
- std::cerr << "t" << ValNo;
- PrintMask(ThisOp, std::cerr);
- std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
-
- if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
- std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
- PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
- } else {
- // Figure out what tmp # it is.
- for (unsigned i = 0; ; ++i)
- if (Vals[i] == ShufTab[ThisOp].Arg0) {
- std::cerr << "t" << i;
- break;
- }
- }
-
- if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
- std::cerr << ", ";
- if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
- std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
- PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
- } else {
- // Figure out what tmp # it is.
- for (unsigned i = 0; ; ++i)
- if (Vals[i] == ShufTab[ThisOp].Arg1) {
- std::cerr << "t" << i;
- break;
- }
- }
- }
- std::cerr << ") ";
-}
-
-static unsigned getNumEntered() {
- unsigned Count = 0;
- for (unsigned i = 0; i != 65536; ++i)
- Count += ShufTab[i].Cost < 100;
- return Count;
-}
-
-static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
- unsigned &NumVals) {
- if (ShufTab[Elt].Cost == 0) return;
-
- // If this value has already been evaluated, it is free. FIXME: match undefs.
- for (unsigned i = 0, e = NumVals; i != e; ++i)
- if (Vals[i] == Elt) return;
-
- // Otherwise, get the operands of the value, then add it.
- unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
- if (ShufTab[Arg0].Cost)
- EvaluateOps(Arg0, Vals, NumVals);
- if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
- EvaluateOps(Arg1, Vals, NumVals);
-
- Vals[NumVals++] = Elt;
-}
-
-
-int main() {
- // Seed the table with accesses to the LHS and RHS.
- ShufTab[0x0123].Cost = 0;
- ShufTab[0x0123].Op = 0;
- ShufTab[0x0123].Arg0 = 0x0123;
- ShufTab[0x4567].Cost = 0;
- ShufTab[0x4567].Op = 0;
- ShufTab[0x4567].Arg0 = 0x4567;
-
- // Seed the first-level of shuffles, shuffles whose inputs are the input to
- // the vectorshuffle operation.
- bool MadeChange = true;
- unsigned OpCount = 0;
- while (MadeChange) {
- MadeChange = false;
- ++OpCount;
- std::cerr << "Starting iteration #" << OpCount << " with "
- << getNumEntered() << " entries established.\n";
-
- // Scan the table for two reasons: First, compute the maximum cost of any
- // operation left in the table. Second, make sure that values with undefs
- // have the cheapest alternative that they match.
- unsigned MaxCost = ShufTab[0].Cost;
- for (unsigned i = 1; i != 0x8889; ++i) {
- if (!isValidMask(i)) continue;
- if (ShufTab[i].Cost > MaxCost)
- MaxCost = ShufTab[i].Cost;
-
- // If this value has an undef, make it be computed the cheapest possible
- // way of any of the things that it matches.
- if (hasUndefElements(i)) {
- // This code is a little bit tricky, so here's the idea: consider some
- // permutation, like 7u4u. To compute the lowest cost for 7u4u, we
- // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries. If
- // there are 3 undefs, the number rises to 729 entries we have to scan,
- // and for the 4 undef case, we have to scan the whole table.
- //
- // Instead of doing this huge amount of scanning, we process the table
- // entries *in order*, and use the fact that 'u' is 8, larger than any
- // valid index. Given an entry like 7u4u then, we only need to scan
- // 7[0-7]4u - 8 entries. We can get away with this, because we already
- // know that each of 704u, 714u, 724u, etc contain the minimum value of
- // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
- unsigned UndefIdx;
- if (i & 0x8000)
- UndefIdx = 0;
- else if (i & 0x0800)
- UndefIdx = 1;
- else if (i & 0x0080)
- UndefIdx = 2;
- else if (i & 0x0008)
- UndefIdx = 3;
- else
- abort();
-
- unsigned MinVal = i;
- unsigned MinCost = ShufTab[i].Cost;
-
- // Scan the 8 entries.
- for (unsigned j = 0; j != 8; ++j) {
- unsigned NewElt = setMaskElt(i, UndefIdx, j);
- if (ShufTab[NewElt].Cost < MinCost) {
- MinCost = ShufTab[NewElt].Cost;
- MinVal = NewElt;
- }
- }
-
- // If we found something cheaper than what was here before, use it.
- if (i != MinVal) {
- MadeChange = true;
- ShufTab[i] = ShufTab[MinVal];
- }
- }
- }
-
- for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
- if (!isValidMask(LHS)) continue;
- if (ShufTab[LHS].Cost > 1000) continue;
-
- // If nothing involving this operand could possibly be cheaper than what
- // we already have, don't consider it.
- if (ShufTab[LHS].Cost + 1 >= MaxCost)
- continue;
-
- for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
- Operator *Op = TheOperators[opnum];
-
- // Evaluate op(LHS,LHS)
- unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
-
- unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
- if (Cost < ShufTab[ResultMask].Cost) {
- ShufTab[ResultMask].Cost = Cost;
- ShufTab[ResultMask].Op = Op;
- ShufTab[ResultMask].Arg0 = LHS;
- ShufTab[ResultMask].Arg1 = LHS;
- MadeChange = true;
- }
-
- // If this is a two input instruction, include the op(x,y) cases. If
- // this is a one input instruction, skip this.
- if (Op->isOnlyLHSOperator()) continue;
-
- for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
- if (!isValidMask(RHS)) continue;
- if (ShufTab[RHS].Cost > 1000) continue;
-
- // If nothing involving this operand could possibly be cheaper than
- // what we already have, don't consider it.
- if (ShufTab[RHS].Cost + 1 >= MaxCost)
- continue;
-
-
- // Evaluate op(LHS,RHS)
- unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
-
- if (ShufTab[ResultMask].Cost <= OpCount ||
- ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
- ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
- continue;
-
- // Figure out the cost to evaluate this, knowing that CSE's only need
- // to be evaluated once.
- unsigned short Vals[30];
- unsigned NumVals = 0;
- EvaluateOps(LHS, Vals, NumVals);
- EvaluateOps(RHS, Vals, NumVals);
-
- unsigned Cost = NumVals + Op->getCost();
- if (Cost < ShufTab[ResultMask].Cost) {
- ShufTab[ResultMask].Cost = Cost;
- ShufTab[ResultMask].Op = Op;
- ShufTab[ResultMask].Arg0 = LHS;
- ShufTab[ResultMask].Arg1 = RHS;
- MadeChange = true;
- }
- }
- }
- }
- }
-
- std::cerr << "Finished Table has " << getNumEntered()
- << " entries established.\n";
-
- unsigned CostArray[10] = { 0 };
-
- // Compute a cost histogram.
- for (unsigned i = 0; i != 65536; ++i) {
- if (!isValidMask(i)) continue;
- if (ShufTab[i].Cost > 9)
- ++CostArray[9];
- else
- ++CostArray[ShufTab[i].Cost];
- }
-
- for (unsigned i = 0; i != 9; ++i)
- if (CostArray[i])
- std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
- if (CostArray[9])
- std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
-
-
- // Build up the table to emit.
- std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
- std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
-
- for (unsigned i = 0; i != 0x8889; ++i) {
- if (!isValidMask(i)) continue;
-
- // CostSat - The cost of this operation saturated to two bits.
- unsigned CostSat = ShufTab[i].Cost;
- if (CostSat > 4) CostSat = 4;
- if (CostSat == 0) CostSat = 1;
- --CostSat; // Cost is now between 0-3.
-
- unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
- assert(OpNum < 16 && "Too few bits to encode operation!");
-
- unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
- unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
-
- // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
- // LHS, and 13 bits of RHS = 32 bits.
- unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
-
- std::cout << " " << Val << "U,\t// ";
- PrintMask(i, std::cout);
- std::cout << ": Cost " << ShufTab[i].Cost;
- std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
- std::cout << " ";
- if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
- std::cout << getZeroCostOpName(ShufTab[i].Arg0);
- } else {
- PrintMask(ShufTab[i].Arg0, std::cout);
- }
-
- if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
- std::cout << ", ";
- if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
- std::cout << getZeroCostOpName(ShufTab[i].Arg1);
- } else {
- PrintMask(ShufTab[i].Arg1, std::cout);
- }
- }
- std::cout << "\n";
- }
- std::cout << " 0\n};\n";
-
- if (0) {
- // Print out the table.
- for (unsigned i = 0; i != 0x8889; ++i) {
- if (!isValidMask(i)) continue;
- if (ShufTab[i].Cost < 1000) {
- PrintMask(i, std::cerr);
- std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
-
- unsigned short Vals[30];
- unsigned NumVals = 0;
- EvaluateOps(i, Vals, NumVals);
-
- for (unsigned j = 0, e = NumVals; j != e; ++j)
- PrintOperation(j, Vals);
- std::cerr << "\n";
- }
- }
- }
-}
-
-
-#ifdef GENERATE_ALTIVEC
-
-///===---------------------------------------------------------------------===//
-/// The altivec instruction definitions. This is the altivec-specific part of
-/// this file.
-///===---------------------------------------------------------------------===//
-
-// Note that the opcode numbers here must match those in the PPC backend.
-enum {
- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
- OP_VMRGHW,
- OP_VMRGLW,
- OP_VSPLTISW0,
- OP_VSPLTISW1,
- OP_VSPLTISW2,
- OP_VSPLTISW3,
- OP_VSLDOI4,
- OP_VSLDOI8,
- OP_VSLDOI12
-};
-
-struct vmrghw : public Operator {
- vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
-} the_vmrghw;
-
-struct vmrglw : public Operator {
- vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
-} the_vmrglw;
-
-template<unsigned Elt>
-struct vspltisw : public Operator {
- vspltisw(const char *N, unsigned Opc)
- : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
-};
-
-vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
-vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
-vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
-vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
-
-template<unsigned N>
-struct vsldoi : public Operator {
- vsldoi(const char *Name, unsigned Opc)
- : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
- }
-};
-
-vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
-vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
-vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
-
-#endif
-
-#define GENERATE_NEON
-
-#ifdef GENERATE_NEON
-enum {
- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
- OP_VREV,
- OP_VDUP0,
- OP_VDUP1,
- OP_VDUP2,
- OP_VDUP3,
- OP_VEXT1,
- OP_VEXT2,
- OP_VEXT3,
- OP_VUZPL, // VUZP, left result
- OP_VUZPR, // VUZP, right result
- OP_VZIPL, // VZIP, left result
- OP_VZIPR, // VZIP, right result
- OP_VTRNL, // VTRN, left result
- OP_VTRNR // VTRN, right result
-};
-
-struct vrev : public Operator {
- vrev() : Operator(0x1032, "vrev", OP_VREV) {}
-} the_vrev;
-
-template<unsigned Elt>
-struct vdup : public Operator {
- vdup(const char *N, unsigned Opc)
- : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
-};
-
-vdup<0> the_vdup0("vdup0", OP_VDUP0);
-vdup<1> the_vdup1("vdup1", OP_VDUP1);
-vdup<2> the_vdup2("vdup2", OP_VDUP2);
-vdup<3> the_vdup3("vdup3", OP_VDUP3);
-
-template<unsigned N>
-struct vext : public Operator {
- vext(const char *Name, unsigned Opc)
- : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
- }
-};
-
-vext<1> the_vext1("vext1", OP_VEXT1);
-vext<2> the_vext2("vext2", OP_VEXT2);
-vext<3> the_vext3("vext3", OP_VEXT3);
-
-struct vuzpl : public Operator {
- vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {}
-} the_vuzpl;
-
-struct vuzpr : public Operator {
- vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {}
-} the_vuzpr;
-
-struct vzipl : public Operator {
- vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {}
-} the_vzipl;
-
-struct vzipr : public Operator {
- vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {}
-} the_vzipr;
-
-struct vtrnl : public Operator {
- vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {}
-} the_vtrnl;
-
-struct vtrnr : public Operator {
- vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {}
-} the_vtrnr;
-
-#endif