• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /****************************************************************************
2   * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3   *
4   * Permission is hereby granted, free of charge, to any person obtaining a
5   * copy of this software and associated documentation files (the "Software"),
6   * to deal in the Software without restriction, including without limitation
7   * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8   * and/or sell copies of the Software, and to permit persons to whom the
9   * Software is furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice (including the next
12   * paragraph) shall be included in all copies or substantial portions of the
13   * Software.
14   *
15   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18   * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20   * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21   * IN THE SOFTWARE.
22   *
23   * @file builder_misc.cpp
24   *
25   * @brief Implementation for miscellaneous builder functions
26   *
27   * Notes:
28   *
29   ******************************************************************************/
30  #include "jit_pch.hpp"
31  #include "builder.h"
32  #include "common/rdtsc_buckets.h"
33  
34  #include <cstdarg>
35  
36  extern "C" void CallPrint(const char* fmt, ...);
37  
38  namespace SwrJit
39  {
40      //////////////////////////////////////////////////////////////////////////
41      /// @brief Convert an IEEE 754 32-bit single precision float to an
42      ///        16 bit float with 5 exponent bits and a variable
43      ///        number of mantissa bits.
44      /// @param val - 32-bit float
45      /// @todo Maybe move this outside of this file into a header?
ConvertFloat32ToFloat16(float val)46      static uint16_t ConvertFloat32ToFloat16(float val)
47      {
48          uint32_t sign, exp, mant;
49          uint32_t roundBits;
50  
51          // Extract the sign, exponent, and mantissa
52          uint32_t uf = *(uint32_t*)&val;
53          sign        = (uf & 0x80000000) >> 31;
54          exp         = (uf & 0x7F800000) >> 23;
55          mant        = uf & 0x007FFFFF;
56  
57          // Check for out of range
58          if (std::isnan(val))
59          {
60              exp  = 0x1F;
61              mant = 0x200;
62              sign = 1; // set the sign bit for NANs
63          }
64          else if (std::isinf(val))
65          {
66              exp  = 0x1f;
67              mant = 0x0;
68          }
69          else if (exp > (0x70 + 0x1E)) // Too big to represent -> max representable value
70          {
71              exp  = 0x1E;
72              mant = 0x3FF;
73          }
74          else if ((exp <= 0x70) && (exp >= 0x66)) // It's a denorm
75          {
76              mant |= 0x00800000;
77              for (; exp <= 0x70; mant >>= 1, exp++)
78                  ;
79              exp  = 0;
80              mant = mant >> 13;
81          }
82          else if (exp < 0x66) // Too small to represent -> Zero
83          {
84              exp  = 0;
85              mant = 0;
86          }
87          else
88          {
89              // Saves bits that will be shifted off for rounding
90              roundBits = mant & 0x1FFFu;
91              // convert exponent and mantissa to 16 bit format
92              exp  = exp - 0x70;
93              mant = mant >> 13;
94  
95              // Essentially RTZ, but round up if off by only 1 lsb
96              if (roundBits == 0x1FFFu)
97              {
98                  mant++;
99                  // check for overflow
100                  if ((mant & 0xC00u) != 0)
101                      exp++;
102                  // make sure only the needed bits are used
103                  mant &= 0x3FF;
104              }
105          }
106  
107          uint32_t tmpVal = (sign << 15) | (exp << 10) | mant;
108          return (uint16_t)tmpVal;
109      }
110  
C(bool i)111      Constant* Builder::C(bool i) { return ConstantInt::get(IRB()->getInt1Ty(), (i ? 1 : 0)); }
112  
C(char i)113      Constant* Builder::C(char i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
114  
C(uint8_t i)115      Constant* Builder::C(uint8_t i) { return ConstantInt::get(IRB()->getInt8Ty(), i); }
116  
C(int i)117      Constant* Builder::C(int i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
118  
C(int64_t i)119      Constant* Builder::C(int64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
120  
C(uint16_t i)121      Constant* Builder::C(uint16_t i) { return ConstantInt::get(mInt16Ty, i); }
122  
C(uint32_t i)123      Constant* Builder::C(uint32_t i) { return ConstantInt::get(IRB()->getInt32Ty(), i); }
124  
C(uint64_t i)125      Constant* Builder::C(uint64_t i) { return ConstantInt::get(IRB()->getInt64Ty(), i); }
126  
C(float i)127      Constant* Builder::C(float i) { return ConstantFP::get(IRB()->getFloatTy(), i); }
128  
PRED(bool pred)129      Constant* Builder::PRED(bool pred)
130      {
131          return ConstantInt::get(IRB()->getInt1Ty(), (pred ? 1 : 0));
132      }
133  
VIMMED1(uint64_t i)134      Value* Builder::VIMMED1(uint64_t i)
135      {
136  #if LLVM_VERSION_MAJOR <= 10
137          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
138  #elif LLVM_VERSION_MAJOR == 11
139          return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
140  #else
141          return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
142  #endif
143      }
144  
VIMMED1_16(uint64_t i)145      Value* Builder::VIMMED1_16(uint64_t i)
146      {
147  #if LLVM_VERSION_MAJOR <= 10
148          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
149  #elif LLVM_VERSION_MAJOR == 11
150          return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
151  #else
152          return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
153  #endif
154      }
155  
VIMMED1(int i)156      Value* Builder::VIMMED1(int i)
157      {
158  #if LLVM_VERSION_MAJOR <= 10
159          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
160  #elif LLVM_VERSION_MAJOR == 11
161          return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
162  #else
163          return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
164  #endif
165      }
166  
VIMMED1_16(int i)167      Value* Builder::VIMMED1_16(int i)
168      {
169  #if LLVM_VERSION_MAJOR <= 10
170          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
171  #elif LLVM_VERSION_MAJOR == 11
172          return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
173  #else
174          return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
175  #endif
176      }
177  
VIMMED1(uint32_t i)178      Value* Builder::VIMMED1(uint32_t i)
179      {
180  #if LLVM_VERSION_MAJOR <= 10
181          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
182  #elif LLVM_VERSION_MAJOR == 11
183          return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
184  #else
185          return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
186  #endif
187      }
188  
VIMMED1_16(uint32_t i)189      Value* Builder::VIMMED1_16(uint32_t i)
190      {
191  #if LLVM_VERSION_MAJOR <= 10
192          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
193  #elif LLVM_VERSION_MAJOR == 11
194          return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
195  #else
196          return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
197  #endif
198      }
199  
VIMMED1(float i)200      Value* Builder::VIMMED1(float i)
201      {
202  #if LLVM_VERSION_MAJOR <= 10
203          return ConstantVector::getSplat(mVWidth, cast<ConstantFP>(C(i)));
204  #elif LLVM_VERSION_MAJOR == 11
205          return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantFP>(C(i)));
206  #else
207          return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantFP>(C(i)));
208  #endif
209      }
210  
VIMMED1_16(float i)211      Value* Builder::VIMMED1_16(float i)
212      {
213  #if LLVM_VERSION_MAJOR <= 10
214          return ConstantVector::getSplat(mVWidth16, cast<ConstantFP>(C(i)));
215  #elif LLVM_VERSION_MAJOR == 11
216          return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantFP>(C(i)));
217  #else
218          return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantFP>(C(i)));
219  #endif
220      }
221  
VIMMED1(bool i)222      Value* Builder::VIMMED1(bool i)
223      {
224  #if LLVM_VERSION_MAJOR <= 10
225          return ConstantVector::getSplat(mVWidth, cast<ConstantInt>(C(i)));
226  #elif LLVM_VERSION_MAJOR == 11
227          return ConstantVector::getSplat(ElementCount(mVWidth, false), cast<ConstantInt>(C(i)));
228  #else
229          return ConstantVector::getSplat(ElementCount::get(mVWidth, false), cast<ConstantInt>(C(i)));
230  #endif
231      }
232  
VIMMED1_16(bool i)233      Value* Builder::VIMMED1_16(bool i)
234      {
235  #if LLVM_VERSION_MAJOR <= 10
236          return ConstantVector::getSplat(mVWidth16, cast<ConstantInt>(C(i)));
237  #elif LLVM_VERSION_MAJOR == 11
238          return ConstantVector::getSplat(ElementCount(mVWidth16, false), cast<ConstantInt>(C(i)));
239  #else
240          return ConstantVector::getSplat(ElementCount::get(mVWidth16, false), cast<ConstantInt>(C(i)));
241  #endif
242      }
243  
VUNDEF_IPTR()244      Value* Builder::VUNDEF_IPTR() { return UndefValue::get(getVectorType(mInt32PtrTy, mVWidth)); }
245  
VUNDEF(Type * t)246      Value* Builder::VUNDEF(Type* t) { return UndefValue::get(getVectorType(t, mVWidth)); }
247  
VUNDEF_I()248      Value* Builder::VUNDEF_I() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth)); }
249  
VUNDEF_I_16()250      Value* Builder::VUNDEF_I_16() { return UndefValue::get(getVectorType(mInt32Ty, mVWidth16)); }
251  
VUNDEF_F()252      Value* Builder::VUNDEF_F() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth)); }
253  
VUNDEF_F_16()254      Value* Builder::VUNDEF_F_16() { return UndefValue::get(getVectorType(mFP32Ty, mVWidth16)); }
255  
VUNDEF(Type * ty,uint32_t size)256      Value* Builder::VUNDEF(Type* ty, uint32_t size)
257      {
258          return UndefValue::get(getVectorType(ty, size));
259      }
260  
VBROADCAST(Value * src,const llvm::Twine & name)261      Value* Builder::VBROADCAST(Value* src, const llvm::Twine& name)
262      {
263          // check if src is already a vector
264          if (src->getType()->isVectorTy())
265          {
266              return src;
267          }
268  
269          return VECTOR_SPLAT(mVWidth, src, name);
270      }
271  
VBROADCAST_16(Value * src)272      Value* Builder::VBROADCAST_16(Value* src)
273      {
274          // check if src is already a vector
275          if (src->getType()->isVectorTy())
276          {
277              return src;
278          }
279  
280          return VECTOR_SPLAT(mVWidth16, src);
281      }
282  
IMMED(Value * v)283      uint32_t Builder::IMMED(Value* v)
284      {
285          SWR_ASSERT(isa<ConstantInt>(v));
286          ConstantInt* pValConst = cast<ConstantInt>(v);
287          return pValConst->getZExtValue();
288      }
289  
S_IMMED(Value * v)290      int32_t Builder::S_IMMED(Value* v)
291      {
292          SWR_ASSERT(isa<ConstantInt>(v));
293          ConstantInt* pValConst = cast<ConstantInt>(v);
294          return pValConst->getSExtValue();
295      }
296  
CALL(Value * Callee,const std::initializer_list<Value * > & argsList,const llvm::Twine & name)297      CallInst* Builder::CALL(Value*                               Callee,
298                              const std::initializer_list<Value*>& argsList,
299                              const llvm::Twine&                   name)
300      {
301          std::vector<Value*> args;
302          for (auto arg : argsList)
303              args.push_back(arg);
304  #if LLVM_VERSION_MAJOR >= 11
305          // see comment to CALLA(Callee) function in the header
306          return CALLA(FunctionCallee(cast<Function>(Callee)), args, name);
307  #else
308          return CALLA(Callee, args, name);
309  #endif
310      }
311  
CALL(Value * Callee,Value * arg)312      CallInst* Builder::CALL(Value* Callee, Value* arg)
313      {
314          std::vector<Value*> args;
315          args.push_back(arg);
316  #if LLVM_VERSION_MAJOR >= 11
317          // see comment to CALLA(Callee) function in the header
318          return CALLA(FunctionCallee(cast<Function>(Callee)), args);
319  #else
320          return CALLA(Callee, args);
321  #endif
322      }
323  
CALL2(Value * Callee,Value * arg1,Value * arg2)324      CallInst* Builder::CALL2(Value* Callee, Value* arg1, Value* arg2)
325      {
326          std::vector<Value*> args;
327          args.push_back(arg1);
328          args.push_back(arg2);
329  #if LLVM_VERSION_MAJOR >= 11
330          // see comment to CALLA(Callee) function in the header
331          return CALLA(FunctionCallee(cast<Function>(Callee)), args);
332  #else
333          return CALLA(Callee, args);
334  #endif
335      }
336  
CALL3(Value * Callee,Value * arg1,Value * arg2,Value * arg3)337      CallInst* Builder::CALL3(Value* Callee, Value* arg1, Value* arg2, Value* arg3)
338      {
339          std::vector<Value*> args;
340          args.push_back(arg1);
341          args.push_back(arg2);
342          args.push_back(arg3);
343  #if LLVM_VERSION_MAJOR >= 11
344          // see comment to CALLA(Callee) function in the header
345          return CALLA(FunctionCallee(cast<Function>(Callee)), args);
346  #else
347          return CALLA(Callee, args);
348  #endif
349      }
350  
VRCP(Value * va,const llvm::Twine & name)351      Value* Builder::VRCP(Value* va, const llvm::Twine& name)
352      {
353          return FDIV(VIMMED1(1.0f), va, name); // 1 / a
354      }
355  
VPLANEPS(Value * vA,Value * vB,Value * vC,Value * & vX,Value * & vY)356      Value* Builder::VPLANEPS(Value* vA, Value* vB, Value* vC, Value*& vX, Value*& vY)
357      {
358          Value* vOut = FMADDPS(vA, vX, vC);
359          vOut        = FMADDPS(vB, vY, vOut);
360          return vOut;
361      }
362  
363      //////////////////////////////////////////////////////////////////////////
364      /// @brief insert a JIT call to CallPrint
365      /// - outputs formatted string to both stdout and VS output window
366      /// - DEBUG builds only
367      /// Usage example:
368      ///   PRINT("index %d = 0x%p\n",{C(lane), pIndex});
369      ///   where C(lane) creates a constant value to print, and pIndex is the Value*
370      ///   result from a GEP, printing out the pointer to memory
371      /// @param printStr - constant string to print, which includes format specifiers
372      /// @param printArgs - initializer list of Value*'s to print to std out
PRINT(const std::string & printStr,const std::initializer_list<Value * > & printArgs)373      CallInst* Builder::PRINT(const std::string&                   printStr,
374                               const std::initializer_list<Value*>& printArgs)
375      {
376          // push the arguments to CallPrint into a vector
377          std::vector<Value*> printCallArgs;
378          // save room for the format string.  we still need to modify it for vectors
379          printCallArgs.resize(1);
380  
381          // search through the format string for special processing
382          size_t      pos = 0;
383          std::string tempStr(printStr);
384          pos    = tempStr.find('%', pos);
385          auto v = printArgs.begin();
386  
387          while ((pos != std::string::npos) && (v != printArgs.end()))
388          {
389              Value* pArg  = *v;
390              Type*  pType = pArg->getType();
391  
392              if (pType->isVectorTy())
393              {
394                  Type* pContainedType = pType->getContainedType(0);
395  #if LLVM_VERSION_MAJOR >= 11
396                  VectorType* pVectorType = cast<VectorType>(pType);
397  #endif
398                  if (toupper(tempStr[pos + 1]) == 'X')
399                  {
400                      tempStr[pos]     = '0';
401                      tempStr[pos + 1] = 'x';
402                      tempStr.insert(pos + 2, "%08X ");
403                      pos += 7;
404  
405                      printCallArgs.push_back(VEXTRACT(pArg, C(0)));
406  
407                      std::string vectorFormatStr;
408  #if LLVM_VERSION_MAJOR >= 11
409                      for (uint32_t i = 1; i < pVectorType->getNumElements(); ++i)
410  #else
411                      for (uint32_t i = 1; i < pType->getVectorNumElements(); ++i)
412  #endif
413                      {
414                          vectorFormatStr += "0x%08X ";
415                          printCallArgs.push_back(VEXTRACT(pArg, C(i)));
416                      }
417  
418                      tempStr.insert(pos, vectorFormatStr);
419                      pos += vectorFormatStr.size();
420                  }
421                  else if ((tempStr[pos + 1] == 'f') && (pContainedType->isFloatTy()))
422                  {
423                      uint32_t i = 0;
424  #if LLVM_VERSION_MAJOR >= 11
425                      for (; i < pVectorType->getNumElements() - 1; i++)
426  #else
427                      for (; i < pType->getVectorNumElements() - 1; i++)
428  #endif
429                      {
430                          tempStr.insert(pos, std::string("%f "));
431                          pos += 3;
432                          printCallArgs.push_back(
433                              FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
434                      }
435                      printCallArgs.push_back(
436                          FP_EXT(VEXTRACT(pArg, C(i)), Type::getDoubleTy(JM()->mContext)));
437                  }
438                  else if ((tempStr[pos + 1] == 'd') && (pContainedType->isIntegerTy()))
439                  {
440                      uint32_t i = 0;
441  #if LLVM_VERSION_MAJOR >= 11
442                      for (; i < pVectorType->getNumElements() - 1; i++)
443  #else
444                      for (; i < pType->getVectorNumElements() - 1; i++)
445  #endif
446                      {
447                          tempStr.insert(pos, std::string("%d "));
448                          pos += 3;
449                          printCallArgs.push_back(
450                              S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
451                      }
452                      printCallArgs.push_back(
453                          S_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
454                  }
455                  else if ((tempStr[pos + 1] == 'u') && (pContainedType->isIntegerTy()))
456                  {
457                      uint32_t i = 0;
458  #if LLVM_VERSION_MAJOR >= 11
459                      for (; i < pVectorType->getNumElements() - 1; i++)
460  #else
461                      for (; i < pType->getVectorNumElements() - 1; i++)
462  #endif
463                      {
464                          tempStr.insert(pos, std::string("%d "));
465                          pos += 3;
466                          printCallArgs.push_back(
467                              Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
468                      }
469                      printCallArgs.push_back(
470                          Z_EXT(VEXTRACT(pArg, C(i)), Type::getInt32Ty(JM()->mContext)));
471                  }
472              }
473              else
474              {
475                  if (toupper(tempStr[pos + 1]) == 'X')
476                  {
477                      tempStr[pos] = '0';
478                      tempStr.insert(pos + 1, "x%08");
479                      printCallArgs.push_back(pArg);
480                      pos += 3;
481                  }
482                  // for %f we need to cast float Values to doubles so that they print out correctly
483                  else if ((tempStr[pos + 1] == 'f') && (pType->isFloatTy()))
484                  {
485                      printCallArgs.push_back(FP_EXT(pArg, Type::getDoubleTy(JM()->mContext)));
486                      pos++;
487                  }
488                  else
489                  {
490                      printCallArgs.push_back(pArg);
491                  }
492              }
493  
494              // advance to the next arguement
495              v++;
496              pos = tempStr.find('%', ++pos);
497          }
498  
499          // create global variable constant string
500          Constant*       constString = ConstantDataArray::getString(JM()->mContext, tempStr, true);
501          GlobalVariable* gvPtr       = new GlobalVariable(
502              constString->getType(), true, GlobalValue::InternalLinkage, constString, "printStr");
503          JM()->mpCurrentModule->getGlobalList().push_back(gvPtr);
504  
505          // get a pointer to the first character in the constant string array
506          std::vector<Constant*> geplist{C(0), C(0)};
507          Constant* strGEP = ConstantExpr::getGetElementPtr(nullptr, gvPtr, geplist, false);
508  
509          // insert the pointer to the format string in the argument vector
510          printCallArgs[0] = strGEP;
511  
512          // get pointer to CallPrint function and insert decl into the module if needed
513          std::vector<Type*> args;
514          args.push_back(PointerType::get(mInt8Ty, 0));
515          FunctionType* callPrintTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, true);
516          Function*     callPrintFn =
517  #if LLVM_VERSION_MAJOR >= 9
518              cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy).getCallee());
519  #else
520              cast<Function>(JM()->mpCurrentModule->getOrInsertFunction("CallPrint", callPrintTy));
521  #endif
522  
523          // if we haven't yet added the symbol to the symbol table
524          if ((sys::DynamicLibrary::SearchForAddressOfSymbol("CallPrint")) == nullptr)
525          {
526              sys::DynamicLibrary::AddSymbol("CallPrint", (void*)&CallPrint);
527          }
528  
529          // insert a call to CallPrint
530          return CALLA(callPrintFn, printCallArgs);
531      }
532  
533      //////////////////////////////////////////////////////////////////////////
534      /// @brief Wrapper around PRINT with initializer list.
PRINT(const std::string & printStr)535      CallInst* Builder::PRINT(const std::string& printStr) { return PRINT(printStr, {}); }
536  
EXTRACT_16(Value * x,uint32_t imm)537      Value* Builder::EXTRACT_16(Value* x, uint32_t imm)
538      {
539          if (imm == 0)
540          {
541              return VSHUFFLE(x, UndefValue::get(x->getType()), {0, 1, 2, 3, 4, 5, 6, 7});
542          }
543          else
544          {
545              return VSHUFFLE(x, UndefValue::get(x->getType()), {8, 9, 10, 11, 12, 13, 14, 15});
546          }
547      }
548  
JOIN_16(Value * a,Value * b)549      Value* Builder::JOIN_16(Value* a, Value* b)
550      {
551          return VSHUFFLE(a, b, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
552      }
553  
554      //////////////////////////////////////////////////////////////////////////
555      /// @brief convert x86 <N x float> mask to llvm <N x i1> mask
MASK(Value * vmask)556      Value* Builder::MASK(Value* vmask)
557      {
558          Value* src = BITCAST(vmask, mSimdInt32Ty);
559          return ICMP_SLT(src, VIMMED1(0));
560      }
561  
MASK_16(Value * vmask)562      Value* Builder::MASK_16(Value* vmask)
563      {
564          Value* src = BITCAST(vmask, mSimd16Int32Ty);
565          return ICMP_SLT(src, VIMMED1_16(0));
566      }
567  
568      //////////////////////////////////////////////////////////////////////////
569      /// @brief convert llvm <N x i1> mask to x86 <N x i32> mask
VMASK(Value * mask)570      Value* Builder::VMASK(Value* mask) { return S_EXT(mask, mSimdInt32Ty); }
571  
VMASK_16(Value * mask)572      Value* Builder::VMASK_16(Value* mask) { return S_EXT(mask, mSimd16Int32Ty); }
573  
574      /// @brief Convert <Nxi1> llvm mask to integer
VMOVMSK(Value * mask)575      Value* Builder::VMOVMSK(Value* mask)
576      {
577  #if LLVM_VERSION_MAJOR >= 11
578          VectorType* pVectorType = cast<VectorType>(mask->getType());
579          SWR_ASSERT(pVectorType->getElementType() == mInt1Ty);
580          uint32_t numLanes = pVectorType->getNumElements();
581  #else
582          SWR_ASSERT(mask->getType()->getVectorElementType() == mInt1Ty);
583          uint32_t numLanes = mask->getType()->getVectorNumElements();
584  #endif
585          Value*   i32Result;
586          if (numLanes == 8)
587          {
588              i32Result = BITCAST(mask, mInt8Ty);
589          }
590          else if (numLanes == 16)
591          {
592              i32Result = BITCAST(mask, mInt16Ty);
593          }
594          else
595          {
596              SWR_ASSERT("Unsupported vector width");
597              i32Result = BITCAST(mask, mInt8Ty);
598          }
599          return Z_EXT(i32Result, mInt32Ty);
600      }
601  
602      //////////////////////////////////////////////////////////////////////////
603      /// @brief Generate a VPSHUFB operation in LLVM IR.  If not
604      /// supported on the underlying platform, emulate it
605      /// @param a - 256bit SIMD(32x8bit) of 8bit integer values
606      /// @param b - 256bit SIMD(32x8bit) of 8bit integer mask values
607      /// Byte masks in lower 128 lane of b selects 8 bit values from lower
608      /// 128bits of a, and vice versa for the upper lanes.  If the mask
609      /// value is negative, '0' is inserted.
PSHUFB(Value * a,Value * b)610      Value* Builder::PSHUFB(Value* a, Value* b)
611      {
612          Value* res;
613          // use avx2 pshufb instruction if available
614          if (JM()->mArch.AVX2())
615          {
616              res = VPSHUFB(a, b);
617          }
618          else
619          {
620              Constant* cB = dyn_cast<Constant>(b);
621              assert(cB != nullptr);
622              // number of 8 bit elements in b
623              uint32_t numElms = cast<VectorType>(cB->getType())->getNumElements();
624              // output vector
625              Value* vShuf = UndefValue::get(getVectorType(mInt8Ty, numElms));
626  
627              // insert an 8 bit value from the high and low lanes of a per loop iteration
628              numElms /= 2;
629              for (uint32_t i = 0; i < numElms; i++)
630              {
631                  ConstantInt* cLow128b  = cast<ConstantInt>(cB->getAggregateElement(i));
632                  ConstantInt* cHigh128b = cast<ConstantInt>(cB->getAggregateElement(i + numElms));
633  
634                  // extract values from constant mask
635                  char valLow128bLane  = (char)(cLow128b->getSExtValue());
636                  char valHigh128bLane = (char)(cHigh128b->getSExtValue());
637  
638                  Value* insertValLow128b;
639                  Value* insertValHigh128b;
640  
641                  // if the mask value is negative, insert a '0' in the respective output position
642                  // otherwise, lookup the value at mask position (bits 3..0 of the respective mask
643                  // byte) in a and insert in output vector
644                  insertValLow128b =
645                      (valLow128bLane < 0) ? C((char)0) : VEXTRACT(a, C((valLow128bLane & 0xF)));
646                  insertValHigh128b = (valHigh128bLane < 0)
647                                          ? C((char)0)
648                                          : VEXTRACT(a, C((valHigh128bLane & 0xF) + numElms));
649  
650                  vShuf = VINSERT(vShuf, insertValLow128b, i);
651                  vShuf = VINSERT(vShuf, insertValHigh128b, (i + numElms));
652              }
653              res = vShuf;
654          }
655          return res;
656      }
657  
658      //////////////////////////////////////////////////////////////////////////
659      /// @brief Generate a VPSHUFB operation (sign extend 8 8bit values to 32
660      /// bits)in LLVM IR.  If not supported on the underlying platform, emulate it
661      /// @param a - 128bit SIMD lane(16x8bit) of 8bit integer values.  Only
662      /// lower 8 values are used.
PMOVSXBD(Value * a)663      Value* Builder::PMOVSXBD(Value* a)
664      {
665          // VPMOVSXBD output type
666          Type* v8x32Ty = getVectorType(mInt32Ty, 8);
667          // Extract 8 values from 128bit lane and sign extend
668          return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
669      }
670  
671      //////////////////////////////////////////////////////////////////////////
672      /// @brief Generate a VPSHUFB operation (sign extend 8 16bit values to 32
673      /// bits)in LLVM IR.  If not supported on the underlying platform, emulate it
674      /// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
PMOVSXWD(Value * a)675      Value* Builder::PMOVSXWD(Value* a)
676      {
677          // VPMOVSXWD output type
678          Type* v8x32Ty = getVectorType(mInt32Ty, 8);
679          // Extract 8 values from 128bit lane and sign extend
680          return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
681      }
682  
683      //////////////////////////////////////////////////////////////////////////
684      /// @brief Generate a VCVTPH2PS operation (float16->float32 conversion)
685      /// in LLVM IR.  If not supported on the underlying platform, emulate it
686      /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
CVTPH2PS(Value * a,const llvm::Twine & name)687      Value* Builder::CVTPH2PS(Value* a, const llvm::Twine& name)
688      {
689          // Bitcast Nxint16 to Nxhalf
690  #if LLVM_VERSION_MAJOR >= 11
691          uint32_t numElems = cast<VectorType>(a->getType())->getNumElements();
692  #else
693          uint32_t numElems = a->getType()->getVectorNumElements();
694  #endif
695          Value*   input    = BITCAST(a, getVectorType(mFP16Ty, numElems));
696  
697          return FP_EXT(input, getVectorType(mFP32Ty, numElems), name);
698      }
699  
700      //////////////////////////////////////////////////////////////////////////
701      /// @brief Generate a VCVTPS2PH operation (float32->float16 conversion)
702      /// in LLVM IR.  If not supported on the underlying platform, emulate it
703      /// @param a - 128bit SIMD lane(8x16bit) of float16 in int16 format.
CVTPS2PH(Value * a,Value * rounding)704      Value* Builder::CVTPS2PH(Value* a, Value* rounding)
705      {
706          if (JM()->mArch.F16C())
707          {
708              return VCVTPS2PH(a, rounding);
709          }
710          else
711          {
712              // call scalar C function for now
713              FunctionType* pFuncTy   = FunctionType::get(mInt16Ty, mFP32Ty);
714              Function*     pCvtPs2Ph = cast<Function>(
715  #if LLVM_VERSION_MAJOR >= 9
716                  JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy).getCallee());
717  #else
718                  JM()->mpCurrentModule->getOrInsertFunction("ConvertFloat32ToFloat16", pFuncTy));
719  #endif
720  
721              if (sys::DynamicLibrary::SearchForAddressOfSymbol("ConvertFloat32ToFloat16") == nullptr)
722              {
723                  sys::DynamicLibrary::AddSymbol("ConvertFloat32ToFloat16",
724                                                 (void*)&ConvertFloat32ToFloat16);
725              }
726  
727              Value* pResult = UndefValue::get(mSimdInt16Ty);
728              for (uint32_t i = 0; i < mVWidth; ++i)
729              {
730                  Value* pSrc  = VEXTRACT(a, C(i));
731                  Value* pConv = CALL(pCvtPs2Ph, std::initializer_list<Value*>{pSrc});
732                  pResult      = VINSERT(pResult, pConv, C(i));
733              }
734  
735              return pResult;
736          }
737      }
738  
PMAXSD(Value * a,Value * b)739      Value* Builder::PMAXSD(Value* a, Value* b)
740      {
741          Value* cmp = ICMP_SGT(a, b);
742          return SELECT(cmp, a, b);
743      }
744  
PMINSD(Value * a,Value * b)745      Value* Builder::PMINSD(Value* a, Value* b)
746      {
747          Value* cmp = ICMP_SLT(a, b);
748          return SELECT(cmp, a, b);
749      }
750  
PMAXUD(Value * a,Value * b)751      Value* Builder::PMAXUD(Value* a, Value* b)
752      {
753          Value* cmp = ICMP_UGT(a, b);
754          return SELECT(cmp, a, b);
755      }
756  
PMINUD(Value * a,Value * b)757      Value* Builder::PMINUD(Value* a, Value* b)
758      {
759          Value* cmp = ICMP_ULT(a, b);
760          return SELECT(cmp, a, b);
761      }
762  
763      // Helper function to create alloca in entry block of function
CreateEntryAlloca(Function * pFunc,Type * pType)764      Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
765      {
766          auto saveIP = IRB()->saveIP();
767          IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
768          Value* pAlloca = ALLOCA(pType);
769          if (saveIP.isSet())
770              IRB()->restoreIP(saveIP);
771          return pAlloca;
772      }
773  
CreateEntryAlloca(Function * pFunc,Type * pType,Value * pArraySize)774      Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize)
775      {
776          auto saveIP = IRB()->saveIP();
777          IRB()->SetInsertPoint(&pFunc->getEntryBlock(), pFunc->getEntryBlock().begin());
778          Value* pAlloca = ALLOCA(pType, pArraySize);
779          if (saveIP.isSet())
780              IRB()->restoreIP(saveIP);
781          return pAlloca;
782      }
783  
VABSPS(Value * a)784      Value* Builder::VABSPS(Value* a)
785      {
786          Value* asInt  = BITCAST(a, mSimdInt32Ty);
787          Value* result = BITCAST(AND(asInt, VIMMED1(0x7fffffff)), mSimdFP32Ty);
788          return result;
789      }
790  
ICLAMP(Value * src,Value * low,Value * high,const llvm::Twine & name)791      Value* Builder::ICLAMP(Value* src, Value* low, Value* high, const llvm::Twine& name)
792      {
793          Value* lowCmp = ICMP_SLT(src, low);
794          Value* ret    = SELECT(lowCmp, low, src);
795  
796          Value* highCmp = ICMP_SGT(ret, high);
797          ret            = SELECT(highCmp, high, ret, name);
798  
799          return ret;
800      }
801  
FCLAMP(Value * src,Value * low,Value * high)802      Value* Builder::FCLAMP(Value* src, Value* low, Value* high)
803      {
804          Value* lowCmp = FCMP_OLT(src, low);
805          Value* ret    = SELECT(lowCmp, low, src);
806  
807          Value* highCmp = FCMP_OGT(ret, high);
808          ret            = SELECT(highCmp, high, ret);
809  
810          return ret;
811      }
812  
FCLAMP(Value * src,float low,float high)813      Value* Builder::FCLAMP(Value* src, float low, float high)
814      {
815          Value* result = VMAXPS(src, VIMMED1(low));
816          result        = VMINPS(result, VIMMED1(high));
817  
818          return result;
819      }
820  
FMADDPS(Value * a,Value * b,Value * c)821      Value* Builder::FMADDPS(Value* a, Value* b, Value* c)
822      {
823          Value* vOut;
824          // This maps to LLVM fmuladd intrinsic
825          vOut = VFMADDPS(a, b, c);
826          return vOut;
827      }
828  
829      //////////////////////////////////////////////////////////////////////////
830      /// @brief pop count on vector mask (e.g. <8 x i1>)
VPOPCNT(Value * a)831      Value* Builder::VPOPCNT(Value* a) { return POPCNT(VMOVMSK(a)); }
832  
833      //////////////////////////////////////////////////////////////////////////
834      /// @brief Float / Fixed-point conversions
835      //////////////////////////////////////////////////////////////////////////
VCVT_F32_FIXED_SI(Value * vFloat,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)836      Value* Builder::VCVT_F32_FIXED_SI(Value*             vFloat,
837                                        uint32_t           numIntBits,
838                                        uint32_t           numFracBits,
839                                        const llvm::Twine& name)
840      {
841          SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
842          Value* fixed = nullptr;
843  
844  #if 0   // This doesn't work for negative numbers!!
845          {
846              fixed = FP_TO_SI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
847                                      C(_MM_FROUND_TO_NEAREST_INT)),
848                               mSimdInt32Ty);
849          }
850          else
851  #endif
852          {
853              // Do round to nearest int on fractional bits first
854              // Not entirely perfect for negative numbers, but close enough
855              vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
856                              C(_MM_FROUND_TO_NEAREST_INT));
857              vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
858  
859              // TODO: Handle INF, NAN, overflow / underflow, etc.
860  
861              Value* vSgn      = FCMP_OLT(vFloat, VIMMED1(0.0f));
862              Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
863              Value* vFixed    = AND(vFloatInt, VIMMED1((1 << 23) - 1));
864              vFixed           = OR(vFixed, VIMMED1(1 << 23));
865              vFixed           = SELECT(vSgn, NEG(vFixed), vFixed);
866  
867              Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
868              vExp        = SUB(vExp, VIMMED1(127));
869  
870              Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
871  
872              fixed = ASHR(vFixed, vExtraBits, name);
873          }
874  
875          return fixed;
876      }
877  
VCVT_FIXED_SI_F32(Value * vFixed,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)878      Value* Builder::VCVT_FIXED_SI_F32(Value*             vFixed,
879                                        uint32_t           numIntBits,
880                                        uint32_t           numFracBits,
881                                        const llvm::Twine& name)
882      {
883          SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
884          uint32_t extraBits = 32 - numIntBits - numFracBits;
885          if (numIntBits && extraBits)
886          {
887              // Sign extend
888              Value* shftAmt = VIMMED1(extraBits);
889              vFixed         = ASHR(SHL(vFixed, shftAmt), shftAmt);
890          }
891  
892          Value* fVal  = VIMMED1(0.0f);
893          Value* fFrac = VIMMED1(0.0f);
894          if (numIntBits)
895          {
896              fVal = SI_TO_FP(ASHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
897          }
898  
899          if (numFracBits)
900          {
901              fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
902              fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
903          }
904  
905          return FADD(fVal, fFrac, name);
906      }
907  
VCVT_F32_FIXED_UI(Value * vFloat,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)908      Value* Builder::VCVT_F32_FIXED_UI(Value*             vFloat,
909                                        uint32_t           numIntBits,
910                                        uint32_t           numFracBits,
911                                        const llvm::Twine& name)
912      {
913          SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
914          Value* fixed = nullptr;
915  #if 1   // KNOB_SIM_FAST_MATH?  Below works correctly from a precision
916          // standpoint...
917          {
918              fixed = FP_TO_UI(VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
919                                      C(_MM_FROUND_TO_NEAREST_INT)),
920                               mSimdInt32Ty);
921          }
922  #else
923          {
924              // Do round to nearest int on fractional bits first
925              vFloat = VROUND(FMUL(vFloat, VIMMED1(float(1 << numFracBits))),
926                              C(_MM_FROUND_TO_NEAREST_INT));
927              vFloat = FMUL(vFloat, VIMMED1(1.0f / float(1 << numFracBits)));
928  
929              // TODO: Handle INF, NAN, overflow / underflow, etc.
930  
931              Value* vSgn      = FCMP_OLT(vFloat, VIMMED1(0.0f));
932              Value* vFloatInt = BITCAST(vFloat, mSimdInt32Ty);
933              Value* vFixed    = AND(vFloatInt, VIMMED1((1 << 23) - 1));
934              vFixed           = OR(vFixed, VIMMED1(1 << 23));
935  
936              Value* vExp = LSHR(SHL(vFloatInt, VIMMED1(1)), VIMMED1(24));
937              vExp        = SUB(vExp, VIMMED1(127));
938  
939              Value* vExtraBits = SUB(VIMMED1(23 - numFracBits), vExp);
940  
941              fixed = LSHR(vFixed, vExtraBits, name);
942          }
943  #endif
944          return fixed;
945      }
946  
VCVT_FIXED_UI_F32(Value * vFixed,uint32_t numIntBits,uint32_t numFracBits,const llvm::Twine & name)947      Value* Builder::VCVT_FIXED_UI_F32(Value*             vFixed,
948                                        uint32_t           numIntBits,
949                                        uint32_t           numFracBits,
950                                        const llvm::Twine& name)
951      {
952          SWR_ASSERT((numIntBits + numFracBits) <= 32, "Can only handle 32-bit fixed-point values");
953          uint32_t extraBits = 32 - numIntBits - numFracBits;
954          if (numIntBits && extraBits)
955          {
956              // Sign extend
957              Value* shftAmt = VIMMED1(extraBits);
958              vFixed         = ASHR(SHL(vFixed, shftAmt), shftAmt);
959          }
960  
961          Value* fVal  = VIMMED1(0.0f);
962          Value* fFrac = VIMMED1(0.0f);
963          if (numIntBits)
964          {
965              fVal = UI_TO_FP(LSHR(vFixed, VIMMED1(numFracBits)), mSimdFP32Ty, name);
966          }
967  
968          if (numFracBits)
969          {
970              fFrac = UI_TO_FP(AND(vFixed, VIMMED1((1 << numFracBits) - 1)), mSimdFP32Ty);
971              fFrac = FDIV(fFrac, VIMMED1(float(1 << numFracBits)), name);
972          }
973  
974          return FADD(fVal, fFrac, name);
975      }
976  
977      //////////////////////////////////////////////////////////////////////////
978      /// @brief C functions called by LLVM IR
979      //////////////////////////////////////////////////////////////////////////
980  
VEXTRACTI128(Value * a,Constant * imm8)981      Value* Builder::VEXTRACTI128(Value* a, Constant* imm8)
982      {
983          bool                      flag = !imm8->isZeroValue();
984          SmallVector<Constant*, 8> idx;
985          for (unsigned i = 0; i < mVWidth / 2; i++)
986          {
987              idx.push_back(C(flag ? i + mVWidth / 2 : i));
988          }
989          return VSHUFFLE(a, VUNDEF_I(), ConstantVector::get(idx));
990      }
991  
VINSERTI128(Value * a,Value * b,Constant * imm8)992      Value* Builder::VINSERTI128(Value* a, Value* b, Constant* imm8)
993      {
994          bool                      flag = !imm8->isZeroValue();
995          SmallVector<Constant*, 8> idx;
996          for (unsigned i = 0; i < mVWidth; i++)
997          {
998              idx.push_back(C(i));
999          }
1000          Value* inter = VSHUFFLE(b, VUNDEF_I(), ConstantVector::get(idx));
1001  
1002          SmallVector<Constant*, 8> idx2;
1003          for (unsigned i = 0; i < mVWidth / 2; i++)
1004          {
1005              idx2.push_back(C(flag ? i : i + mVWidth));
1006          }
1007          for (unsigned i = mVWidth / 2; i < mVWidth; i++)
1008          {
1009              idx2.push_back(C(flag ? i + mVWidth / 2 : i));
1010          }
1011          return VSHUFFLE(a, inter, ConstantVector::get(idx2));
1012      }
1013  
1014      // rdtsc buckets macros
RDTSC_START(Value * pBucketMgr,Value * pId)1015      void Builder::RDTSC_START(Value* pBucketMgr, Value* pId)
1016      {
1017          // @todo due to an issue with thread local storage propagation in llvm, we can only safely
1018          // call into buckets framework when single threaded
1019          if (KNOB_SINGLE_THREADED)
1020          {
1021              std::vector<Type*> args{
1022                  PointerType::get(mInt32Ty, 0), // pBucketMgr
1023                  mInt32Ty                       // id
1024              };
1025  
1026              FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
1027              Function*     pFunc   = cast<Function>(
1028  #if LLVM_VERSION_MAJOR >= 9
1029                  JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy).getCallee());
1030  #else
1031                  JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StartBucket", pFuncTy));
1032  #endif
1033              if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StartBucket") ==
1034                  nullptr)
1035              {
1036                  sys::DynamicLibrary::AddSymbol("BucketManager_StartBucket",
1037                                                 (void*)&BucketManager_StartBucket);
1038              }
1039  
1040              CALL(pFunc, {pBucketMgr, pId});
1041          }
1042      }
1043  
RDTSC_STOP(Value * pBucketMgr,Value * pId)1044      void Builder::RDTSC_STOP(Value* pBucketMgr, Value* pId)
1045      {
1046          // @todo due to an issue with thread local storage propagation in llvm, we can only safely
1047          // call into buckets framework when single threaded
1048          if (KNOB_SINGLE_THREADED)
1049          {
1050              std::vector<Type*> args{
1051                  PointerType::get(mInt32Ty, 0), // pBucketMgr
1052                  mInt32Ty                       // id
1053              };
1054  
1055              FunctionType* pFuncTy = FunctionType::get(Type::getVoidTy(JM()->mContext), args, false);
1056              Function*     pFunc   = cast<Function>(
1057  #if LLVM_VERSION_MAJOR >= 9
1058                  JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy).getCallee());
1059  #else
1060                  JM()->mpCurrentModule->getOrInsertFunction("BucketManager_StopBucket", pFuncTy));
1061  #endif
1062              if (sys::DynamicLibrary::SearchForAddressOfSymbol("BucketManager_StopBucket") ==
1063                  nullptr)
1064              {
1065                  sys::DynamicLibrary::AddSymbol("BucketManager_StopBucket",
1066                                                 (void*)&BucketManager_StopBucket);
1067              }
1068  
1069              CALL(pFunc, {pBucketMgr, pId});
1070          }
1071      }
1072  
GetTypeSize(Type * pType)1073      uint32_t Builder::GetTypeSize(Type* pType)
1074      {
1075          if (pType->isStructTy())
1076          {
1077              uint32_t numElems = pType->getStructNumElements();
1078              Type*    pElemTy  = pType->getStructElementType(0);
1079              return numElems * GetTypeSize(pElemTy);
1080          }
1081  
1082          if (pType->isArrayTy())
1083          {
1084              uint32_t numElems = pType->getArrayNumElements();
1085              Type*    pElemTy  = pType->getArrayElementType();
1086              return numElems * GetTypeSize(pElemTy);
1087          }
1088  
1089          if (pType->isIntegerTy())
1090          {
1091              uint32_t bitSize = pType->getIntegerBitWidth();
1092              return bitSize / 8;
1093          }
1094  
1095          if (pType->isFloatTy())
1096          {
1097              return 4;
1098          }
1099  
1100          if (pType->isHalfTy())
1101          {
1102              return 2;
1103          }
1104  
1105          if (pType->isDoubleTy())
1106          {
1107              return 8;
1108          }
1109  
1110          SWR_ASSERT(false, "Unimplemented type.");
1111          return 0;
1112      }
1113  } // namespace SwrJit
1114