1 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Define several functions to decode x86 specific shuffle semantics using
11 // constants from the constant pool.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86ShuffleDecodeConstantPool.h"
16 #include "Utils/X86ShuffleDecode.h"
17 #include "llvm/CodeGen/MachineValueType.h"
18 #include "llvm/IR/Constants.h"
19 
20 //===----------------------------------------------------------------------===//
21 //  Vector Mask Decoding
22 //===----------------------------------------------------------------------===//
23 
24 namespace llvm {
25 
DecodePSHUFBMask(const Constant * C,SmallVectorImpl<int> & ShuffleMask)26 void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
27   Type *MaskTy = C->getType();
28   // It is not an error for the PSHUFB mask to not be a vector of i8 because the
29   // constant pool uniques constants by their bit representation.
30   // e.g. the following take up the same space in the constant pool:
31   //   i128 -170141183420855150465331762880109871104
32   //
33   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
34   //
35   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
36   //              i32 -2147483648, i32 -2147483648>
37 
38 #ifndef NDEBUG
39   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
40   assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
41 #endif
42 
43   if (!MaskTy->isVectorTy())
44     return;
45   int NumElts = MaskTy->getVectorNumElements();
46 
47   Type *EltTy = MaskTy->getVectorElementType();
48   if (!EltTy->isIntegerTy())
49     return;
50 
51   // The shuffle mask requires a byte vector - decode cases with
52   // wider elements as well.
53   unsigned BitWidth = cast<IntegerType>(EltTy)->getBitWidth();
54   if ((BitWidth % 8) != 0)
55     return;
56 
57   int Scale = BitWidth / 8;
58   int NumBytes = NumElts * Scale;
59   ShuffleMask.reserve(NumBytes);
60 
61   for (int i = 0; i != NumElts; ++i) {
62     Constant *COp = C->getAggregateElement(i);
63     if (!COp) {
64       ShuffleMask.clear();
65       return;
66     } else if (isa<UndefValue>(COp)) {
67       ShuffleMask.append(Scale, SM_SentinelUndef);
68       continue;
69     }
70 
71     APInt APElt = cast<ConstantInt>(COp)->getValue();
72     for (int j = 0; j != Scale; ++j) {
73       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
74       // lane of the vector we're inside.
75       int Base = ((i * Scale) + j) & ~0xf;
76 
77       uint64_t Element = APElt.getLoBits(8).getZExtValue();
78       APElt = APElt.lshr(8);
79 
80       // If the high bit (7) of the byte is set, the element is zeroed.
81       if (Element & (1 << 7))
82         ShuffleMask.push_back(SM_SentinelZero);
83       else {
84         // Only the least significant 4 bits of the byte are used.
85         int Index = Base + (Element & 0xf);
86         ShuffleMask.push_back(Index);
87       }
88     }
89   }
90 
91   assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
92 }
93 
DecodeVPERMILPMask(const Constant * C,unsigned ElSize,SmallVectorImpl<int> & ShuffleMask)94 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
95                         SmallVectorImpl<int> &ShuffleMask) {
96   Type *MaskTy = C->getType();
97   // It is not an error for the PSHUFB mask to not be a vector of i8 because the
98   // constant pool uniques constants by their bit representation.
99   // e.g. the following take up the same space in the constant pool:
100   //   i128 -170141183420855150465331762880109871104
101   //
102   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
103   //
104   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
105   //              i32 -2147483648, i32 -2147483648>
106 
107   if (ElSize != 32 && ElSize != 64)
108     return;
109 
110   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
111   if (MaskTySize != 128 && MaskTySize != 256 && MaskTySize != 512)
112     return;
113 
114   // Only support vector types.
115   if (!MaskTy->isVectorTy())
116     return;
117 
118   // Make sure its an integer type.
119   Type *VecEltTy = MaskTy->getVectorElementType();
120   if (!VecEltTy->isIntegerTy())
121     return;
122 
123   // Support any element type from byte up to element size.
124   // This is necessary primarily because 64-bit elements get split to 32-bit
125   // in the constant pool on 32-bit target.
126   unsigned EltTySize = VecEltTy->getIntegerBitWidth();
127   if (EltTySize < 8 || EltTySize > ElSize)
128     return;
129 
130   unsigned NumElements = MaskTySize / ElSize;
131   assert((NumElements == 2 || NumElements == 4 || NumElements == 8 ||
132           NumElements == 16) &&
133          "Unexpected number of vector elements.");
134   ShuffleMask.reserve(NumElements);
135   unsigned NumElementsPerLane = 128 / ElSize;
136   unsigned Factor = ElSize / EltTySize;
137 
138   for (unsigned i = 0; i < NumElements; ++i) {
139     Constant *COp = C->getAggregateElement(i * Factor);
140     if (!COp) {
141       ShuffleMask.clear();
142       return;
143     } else if (isa<UndefValue>(COp)) {
144       ShuffleMask.push_back(SM_SentinelUndef);
145       continue;
146     }
147     int Index = i & ~(NumElementsPerLane - 1);
148     uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
149     if (ElSize == 64)
150       Index += (Element >> 1) & 0x1;
151     else
152       Index += Element & 0x3;
153     ShuffleMask.push_back(Index);
154   }
155 
156   // TODO: Handle funny-looking vectors too.
157 }
158 
DecodeVPERMIL2PMask(const Constant * C,unsigned M2Z,unsigned ElSize,SmallVectorImpl<int> & ShuffleMask)159 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
160                          SmallVectorImpl<int> &ShuffleMask) {
161   Type *MaskTy = C->getType();
162 
163   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
164   if (MaskTySize != 128 && MaskTySize != 256)
165     return;
166 
167   // Only support vector types.
168   if (!MaskTy->isVectorTy())
169     return;
170 
171   // Make sure its an integer type.
172   Type *VecEltTy = MaskTy->getVectorElementType();
173   if (!VecEltTy->isIntegerTy())
174     return;
175 
176   // Support any element type from byte up to element size.
177   // This is necessary primarily because 64-bit elements get split to 32-bit
178   // in the constant pool on 32-bit target.
179   unsigned EltTySize = VecEltTy->getIntegerBitWidth();
180   if (EltTySize < 8 || EltTySize > ElSize)
181     return;
182 
183   unsigned NumElements = MaskTySize / ElSize;
184   assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
185          "Unexpected number of vector elements.");
186   ShuffleMask.reserve(NumElements);
187   unsigned NumElementsPerLane = 128 / ElSize;
188   unsigned Factor = ElSize / EltTySize;
189 
190   for (unsigned i = 0; i < NumElements; ++i) {
191     Constant *COp = C->getAggregateElement(i * Factor);
192     if (!COp) {
193       ShuffleMask.clear();
194       return;
195     } else if (isa<UndefValue>(COp)) {
196       ShuffleMask.push_back(SM_SentinelUndef);
197       continue;
198     }
199 
200     // VPERMIL2 Operation.
201     // Bits[3] - Match Bit.
202     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
203     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
204     uint64_t Selector = cast<ConstantInt>(COp)->getZExtValue();
205     unsigned MatchBit = (Selector >> 3) & 0x1;
206 
207     // M2Z[0:1]     MatchBit
208     //   0Xb           X        Source selected by Selector index.
209     //   10b           0        Source selected by Selector index.
210     //   10b           1        Zero.
211     //   11b           0        Zero.
212     //   11b           1        Source selected by Selector index.
213     if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
214       ShuffleMask.push_back(SM_SentinelZero);
215       continue;
216     }
217 
218     int Index = i & ~(NumElementsPerLane - 1);
219     if (ElSize == 64)
220       Index += (Selector >> 1) & 0x1;
221     else
222       Index += Selector & 0x3;
223 
224     int Src = (Selector >> 2) & 0x1;
225     Index += Src * NumElements;
226     ShuffleMask.push_back(Index);
227   }
228 
229   // TODO: Handle funny-looking vectors too.
230 }
231 
DecodeVPPERMMask(const Constant * C,SmallVectorImpl<int> & ShuffleMask)232 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
233   Type *MaskTy = C->getType();
234   assert(MaskTy->getPrimitiveSizeInBits() == 128);
235 
236   // Only support vector types.
237   if (!MaskTy->isVectorTy())
238     return;
239 
240   // Make sure its an integer type.
241   Type *VecEltTy = MaskTy->getVectorElementType();
242   if (!VecEltTy->isIntegerTy())
243     return;
244 
245   // The shuffle mask requires a byte vector - decode cases with
246   // wider elements as well.
247   unsigned BitWidth = cast<IntegerType>(VecEltTy)->getBitWidth();
248   if ((BitWidth % 8) != 0)
249     return;
250 
251   int NumElts = MaskTy->getVectorNumElements();
252   int Scale = BitWidth / 8;
253   int NumBytes = NumElts * Scale;
254   ShuffleMask.reserve(NumBytes);
255 
256   for (int i = 0; i != NumElts; ++i) {
257     Constant *COp = C->getAggregateElement(i);
258     if (!COp) {
259       ShuffleMask.clear();
260       return;
261     } else if (isa<UndefValue>(COp)) {
262       ShuffleMask.append(Scale, SM_SentinelUndef);
263       continue;
264     }
265 
266     // VPPERM Operation
267     // Bits[4:0] - Byte Index (0 - 31)
268     // Bits[7:5] - Permute Operation
269     //
270     // Permute Operation:
271     // 0 - Source byte (no logical operation).
272     // 1 - Invert source byte.
273     // 2 - Bit reverse of source byte.
274     // 3 - Bit reverse of inverted source byte.
275     // 4 - 00h (zero - fill).
276     // 5 - FFh (ones - fill).
277     // 6 - Most significant bit of source byte replicated in all bit positions.
278     // 7 - Invert most significant bit of source byte and replicate in all bit positions.
279     APInt MaskElt = cast<ConstantInt>(COp)->getValue();
280     for (int j = 0; j != Scale; ++j) {
281       APInt Index = MaskElt.getLoBits(5);
282       APInt PermuteOp = MaskElt.lshr(5).getLoBits(3);
283       MaskElt = MaskElt.lshr(8);
284 
285       if (PermuteOp == 4) {
286         ShuffleMask.push_back(SM_SentinelZero);
287         continue;
288       }
289       if (PermuteOp != 0) {
290         ShuffleMask.clear();
291         return;
292       }
293       ShuffleMask.push_back((int)Index.getZExtValue());
294     }
295   }
296 
297   assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
298 }
299 
DecodeVPERMVMask(const Constant * C,MVT VT,SmallVectorImpl<int> & ShuffleMask)300 void DecodeVPERMVMask(const Constant *C, MVT VT,
301                       SmallVectorImpl<int> &ShuffleMask) {
302   Type *MaskTy = C->getType();
303   if (MaskTy->isVectorTy()) {
304     unsigned NumElements = MaskTy->getVectorNumElements();
305     if (NumElements == VT.getVectorNumElements()) {
306       unsigned EltMaskSize = Log2_64(NumElements);
307       for (unsigned i = 0; i < NumElements; ++i) {
308         Constant *COp = C->getAggregateElement(i);
309         if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
310           ShuffleMask.clear();
311           return;
312         }
313         if (isa<UndefValue>(COp))
314           ShuffleMask.push_back(SM_SentinelUndef);
315         else {
316           APInt Element = cast<ConstantInt>(COp)->getValue();
317           Element = Element.getLoBits(EltMaskSize);
318           ShuffleMask.push_back(Element.getZExtValue());
319         }
320       }
321     }
322     return;
323   }
324   // Scalar value; just broadcast it
325   if (!isa<ConstantInt>(C))
326     return;
327   uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
328   int NumElements = VT.getVectorNumElements();
329   Element &= (1 << NumElements) - 1;
330   for (int i = 0; i < NumElements; ++i)
331     ShuffleMask.push_back(Element);
332 }
333 
DecodeVPERMV3Mask(const Constant * C,MVT VT,SmallVectorImpl<int> & ShuffleMask)334 void DecodeVPERMV3Mask(const Constant *C, MVT VT,
335                        SmallVectorImpl<int> &ShuffleMask) {
336   Type *MaskTy = C->getType();
337   unsigned NumElements = MaskTy->getVectorNumElements();
338   if (NumElements == VT.getVectorNumElements()) {
339     unsigned EltMaskSize = Log2_64(NumElements * 2);
340     for (unsigned i = 0; i < NumElements; ++i) {
341       Constant *COp = C->getAggregateElement(i);
342       if (!COp) {
343         ShuffleMask.clear();
344         return;
345       }
346       if (isa<UndefValue>(COp))
347         ShuffleMask.push_back(SM_SentinelUndef);
348       else {
349         APInt Element = cast<ConstantInt>(COp)->getValue();
350         Element = Element.getLoBits(EltMaskSize);
351         ShuffleMask.push_back(Element.getZExtValue());
352       }
353     }
354   }
355 }
356 } // llvm namespace
357