1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CodeGenFunction.h"
15 #include "CGCXXABI.h"
16 #include "CGObjCRuntime.h"
17 #include "CodeGenModule.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/ASTContext.h"
20 #include "clang/AST/Decl.h"
21 #include "clang/Basic/TargetBuiltins.h"
22 #include "clang/Basic/TargetInfo.h"
23 #include "clang/CodeGen/CGFunctionInfo.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/IR/CallSite.h"
26 #include "llvm/IR/DataLayout.h"
27 #include "llvm/IR/InlineAsm.h"
28 #include "llvm/IR/Intrinsics.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include <sstream>
31 
32 using namespace clang;
33 using namespace CodeGen;
34 using namespace llvm;
35 
36 /// getBuiltinLibFunction - Given a builtin id for a function like
37 /// "__builtin_fabsf", return a Function* for "fabsf".
getBuiltinLibFunction(const FunctionDecl * FD,unsigned BuiltinID)38 llvm::Value *CodeGenModule::getBuiltinLibFunction(const FunctionDecl *FD,
39                                                   unsigned BuiltinID) {
40   assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
41 
42   // Get the name, skip over the __builtin_ prefix (if necessary).
43   StringRef Name;
44   GlobalDecl D(FD);
45 
46   // If the builtin has been declared explicitly with an assembler label,
47   // use the mangled name. This differs from the plain label on platforms
48   // that prefix labels.
49   if (FD->hasAttr<AsmLabelAttr>())
50     Name = getMangledName(D);
51   else
52     Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
53 
54   llvm::FunctionType *Ty =
55     cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
56 
57   return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
58 }
59 
60 /// Emit the conversions required to turn the given value into an
61 /// integer of the given size.
EmitToInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::IntegerType * IntType)62 static Value *EmitToInt(CodeGenFunction &CGF, llvm::Value *V,
63                         QualType T, llvm::IntegerType *IntType) {
64   V = CGF.EmitToMemory(V, T);
65 
66   if (V->getType()->isPointerTy())
67     return CGF.Builder.CreatePtrToInt(V, IntType);
68 
69   assert(V->getType() == IntType);
70   return V;
71 }
72 
EmitFromInt(CodeGenFunction & CGF,llvm::Value * V,QualType T,llvm::Type * ResultType)73 static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
74                           QualType T, llvm::Type *ResultType) {
75   V = CGF.EmitFromMemory(V, T);
76 
77   if (ResultType->isPointerTy())
78     return CGF.Builder.CreateIntToPtr(V, ResultType);
79 
80   assert(V->getType() == ResultType);
81   return V;
82 }
83 
84 /// Utility to insert an atomic instruction based on Instrinsic::ID
85 /// and the expression node.
MakeBinaryAtomicValue(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)86 static Value *MakeBinaryAtomicValue(CodeGenFunction &CGF,
87                                     llvm::AtomicRMWInst::BinOp Kind,
88                                     const CallExpr *E) {
89   QualType T = E->getType();
90   assert(E->getArg(0)->getType()->isPointerType());
91   assert(CGF.getContext().hasSameUnqualifiedType(T,
92                                   E->getArg(0)->getType()->getPointeeType()));
93   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
94 
95   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
96   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
97 
98   llvm::IntegerType *IntType =
99     llvm::IntegerType::get(CGF.getLLVMContext(),
100                            CGF.getContext().getTypeSize(T));
101   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
102 
103   llvm::Value *Args[2];
104   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
105   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
106   llvm::Type *ValueType = Args[1]->getType();
107   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
108 
109   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
110       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
111   return EmitFromInt(CGF, Result, T, ValueType);
112 }
113 
EmitNontemporalStore(CodeGenFunction & CGF,const CallExpr * E)114 static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
115   Value *Val = CGF.EmitScalarExpr(E->getArg(0));
116   Value *Address = CGF.EmitScalarExpr(E->getArg(1));
117 
118   // Convert the type of the pointer to a pointer to the stored type.
119   Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
120   Value *BC = CGF.Builder.CreateBitCast(
121       Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
122   LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
123   LV.setNontemporal(true);
124   CGF.EmitStoreOfScalar(Val, LV, false);
125   return nullptr;
126 }
127 
EmitNontemporalLoad(CodeGenFunction & CGF,const CallExpr * E)128 static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
129   Value *Address = CGF.EmitScalarExpr(E->getArg(0));
130 
131   LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
132   LV.setNontemporal(true);
133   return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
134 }
135 
EmitBinaryAtomic(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E)136 static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
137                                llvm::AtomicRMWInst::BinOp Kind,
138                                const CallExpr *E) {
139   return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
140 }
141 
142 /// Utility to insert an atomic instruction based Instrinsic::ID and
143 /// the expression node, where the return value is the result of the
144 /// operation.
EmitBinaryAtomicPost(CodeGenFunction & CGF,llvm::AtomicRMWInst::BinOp Kind,const CallExpr * E,Instruction::BinaryOps Op,bool Invert=false)145 static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
146                                    llvm::AtomicRMWInst::BinOp Kind,
147                                    const CallExpr *E,
148                                    Instruction::BinaryOps Op,
149                                    bool Invert = false) {
150   QualType T = E->getType();
151   assert(E->getArg(0)->getType()->isPointerType());
152   assert(CGF.getContext().hasSameUnqualifiedType(T,
153                                   E->getArg(0)->getType()->getPointeeType()));
154   assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
155 
156   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
157   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
158 
159   llvm::IntegerType *IntType =
160     llvm::IntegerType::get(CGF.getLLVMContext(),
161                            CGF.getContext().getTypeSize(T));
162   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
163 
164   llvm::Value *Args[2];
165   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
166   llvm::Type *ValueType = Args[1]->getType();
167   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
168   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
169 
170   llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
171       Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
172   Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
173   if (Invert)
174     Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
175                                      llvm::ConstantInt::get(IntType, -1));
176   Result = EmitFromInt(CGF, Result, T, ValueType);
177   return RValue::get(Result);
178 }
179 
180 /// @brief Utility to insert an atomic cmpxchg instruction.
181 ///
182 /// @param CGF The current codegen function.
183 /// @param E   Builtin call expression to convert to cmpxchg.
184 ///            arg0 - address to operate on
185 ///            arg1 - value to compare with
186 ///            arg2 - new value
187 /// @param ReturnBool Specifies whether to return success flag of
188 ///                   cmpxchg result or the old value.
189 ///
190 /// @returns result of cmpxchg, according to ReturnBool
MakeAtomicCmpXchgValue(CodeGenFunction & CGF,const CallExpr * E,bool ReturnBool)191 static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
192                                      bool ReturnBool) {
193   QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
194   llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
195   unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
196 
197   llvm::IntegerType *IntType = llvm::IntegerType::get(
198       CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
199   llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
200 
201   Value *Args[3];
202   Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
203   Args[1] = CGF.EmitScalarExpr(E->getArg(1));
204   llvm::Type *ValueType = Args[1]->getType();
205   Args[1] = EmitToInt(CGF, Args[1], T, IntType);
206   Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
207 
208   Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
209       Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
210       llvm::AtomicOrdering::SequentiallyConsistent);
211   if (ReturnBool)
212     // Extract boolean success flag and zext it to int.
213     return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
214                                   CGF.ConvertType(E->getType()));
215   else
216     // Extract old value and emit it using the same type as compare value.
217     return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
218                        ValueType);
219 }
220 
221 // Emit a simple mangled intrinsic that has 1 argument and a return type
222 // matching the argument type.
emitUnaryBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)223 static Value *emitUnaryBuiltin(CodeGenFunction &CGF,
224                                const CallExpr *E,
225                                unsigned IntrinsicID) {
226   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
227 
228   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
229   return CGF.Builder.CreateCall(F, Src0);
230 }
231 
232 // Emit an intrinsic that has 2 operands of the same type as its result.
emitBinaryBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)233 static Value *emitBinaryBuiltin(CodeGenFunction &CGF,
234                                 const CallExpr *E,
235                                 unsigned IntrinsicID) {
236   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
237   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
238 
239   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
240   return CGF.Builder.CreateCall(F, { Src0, Src1 });
241 }
242 
243 // Emit an intrinsic that has 3 operands of the same type as its result.
emitTernaryBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)244 static Value *emitTernaryBuiltin(CodeGenFunction &CGF,
245                                  const CallExpr *E,
246                                  unsigned IntrinsicID) {
247   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
248   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
249   llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
250 
251   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
252   return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
253 }
254 
255 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
emitFPIntBuiltin(CodeGenFunction & CGF,const CallExpr * E,unsigned IntrinsicID)256 static Value *emitFPIntBuiltin(CodeGenFunction &CGF,
257                                const CallExpr *E,
258                                unsigned IntrinsicID) {
259   llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
260   llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
261 
262   Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
263   return CGF.Builder.CreateCall(F, {Src0, Src1});
264 }
265 
266 /// EmitFAbs - Emit a call to @llvm.fabs().
EmitFAbs(CodeGenFunction & CGF,Value * V)267 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
268   Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
269   llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
270   Call->setDoesNotAccessMemory();
271   return Call;
272 }
273 
274 /// Emit the computation of the sign bit for a floating point value. Returns
275 /// the i1 sign bit value.
EmitSignBit(CodeGenFunction & CGF,Value * V)276 static Value *EmitSignBit(CodeGenFunction &CGF, Value *V) {
277   LLVMContext &C = CGF.CGM.getLLVMContext();
278 
279   llvm::Type *Ty = V->getType();
280   int Width = Ty->getPrimitiveSizeInBits();
281   llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
282   V = CGF.Builder.CreateBitCast(V, IntTy);
283   if (Ty->isPPC_FP128Ty()) {
284     // We want the sign bit of the higher-order double. The bitcast we just
285     // did works as if the double-double was stored to memory and then
286     // read as an i128. The "store" will put the higher-order double in the
287     // lower address in both little- and big-Endian modes, but the "load"
288     // will treat those bits as a different part of the i128: the low bits in
289     // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
290     // we need to shift the high bits down to the low before truncating.
291     Width >>= 1;
292     if (CGF.getTarget().isBigEndian()) {
293       Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
294       V = CGF.Builder.CreateLShr(V, ShiftCst);
295     }
296     // We are truncating value in order to extract the higher-order
297     // double, which we will be using to extract the sign from.
298     IntTy = llvm::IntegerType::get(C, Width);
299     V = CGF.Builder.CreateTrunc(V, IntTy);
300   }
301   Value *Zero = llvm::Constant::getNullValue(IntTy);
302   return CGF.Builder.CreateICmpSLT(V, Zero);
303 }
304 
emitLibraryCall(CodeGenFunction & CGF,const FunctionDecl * Fn,const CallExpr * E,llvm::Value * calleeValue)305 static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *Fn,
306                               const CallExpr *E, llvm::Value *calleeValue) {
307   return CGF.EmitCall(E->getCallee()->getType(), calleeValue, E,
308                       ReturnValueSlot(), Fn);
309 }
310 
311 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
312 /// depending on IntrinsicID.
313 ///
314 /// \arg CGF The current codegen function.
315 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
316 /// \arg X The first argument to the llvm.*.with.overflow.*.
317 /// \arg Y The second argument to the llvm.*.with.overflow.*.
318 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
319 /// \returns The result (i.e. sum/product) returned by the intrinsic.
EmitOverflowIntrinsic(CodeGenFunction & CGF,const llvm::Intrinsic::ID IntrinsicID,llvm::Value * X,llvm::Value * Y,llvm::Value * & Carry)320 static llvm::Value *EmitOverflowIntrinsic(CodeGenFunction &CGF,
321                                           const llvm::Intrinsic::ID IntrinsicID,
322                                           llvm::Value *X, llvm::Value *Y,
323                                           llvm::Value *&Carry) {
324   // Make sure we have integers of the same width.
325   assert(X->getType() == Y->getType() &&
326          "Arguments must be the same type. (Did you forget to make sure both "
327          "arguments have the same integer width?)");
328 
329   llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
330   llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
331   Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
332   return CGF.Builder.CreateExtractValue(Tmp, 0);
333 }
334 
emitRangedBuiltin(CodeGenFunction & CGF,unsigned IntrinsicID,int low,int high)335 static Value *emitRangedBuiltin(CodeGenFunction &CGF,
336                                 unsigned IntrinsicID,
337                                 int low, int high) {
338     llvm::MDBuilder MDHelper(CGF.getLLVMContext());
339     llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
340     Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
341     llvm::Instruction *Call = CGF.Builder.CreateCall(F);
342     Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
343     return Call;
344 }
345 
346 namespace {
347   struct WidthAndSignedness {
348     unsigned Width;
349     bool Signed;
350   };
351 }
352 
353 static WidthAndSignedness
getIntegerWidthAndSignedness(const clang::ASTContext & context,const clang::QualType Type)354 getIntegerWidthAndSignedness(const clang::ASTContext &context,
355                              const clang::QualType Type) {
356   assert(Type->isIntegerType() && "Given type is not an integer.");
357   unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
358   bool Signed = Type->isSignedIntegerType();
359   return {Width, Signed};
360 }
361 
362 // Given one or more integer types, this function produces an integer type that
363 // encompasses them: any value in one of the given types could be expressed in
364 // the encompassing type.
365 static struct WidthAndSignedness
EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types)366 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
367   assert(Types.size() > 0 && "Empty list of types.");
368 
369   // If any of the given types is signed, we must return a signed type.
370   bool Signed = false;
371   for (const auto &Type : Types) {
372     Signed |= Type.Signed;
373   }
374 
375   // The encompassing type must have a width greater than or equal to the width
376   // of the specified types.  Aditionally, if the encompassing type is signed,
377   // its width must be strictly greater than the width of any unsigned types
378   // given.
379   unsigned Width = 0;
380   for (const auto &Type : Types) {
381     unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
382     if (Width < MinWidth) {
383       Width = MinWidth;
384     }
385   }
386 
387   return {Width, Signed};
388 }
389 
EmitVAStartEnd(Value * ArgValue,bool IsStart)390 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
391   llvm::Type *DestType = Int8PtrTy;
392   if (ArgValue->getType() != DestType)
393     ArgValue =
394         Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
395 
396   Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
397   return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
398 }
399 
400 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
401 /// __builtin_object_size(p, @p To) is correct
areBOSTypesCompatible(int From,int To)402 static bool areBOSTypesCompatible(int From, int To) {
403   // Note: Our __builtin_object_size implementation currently treats Type=0 and
404   // Type=2 identically. Encoding this implementation detail here may make
405   // improving __builtin_object_size difficult in the future, so it's omitted.
406   return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
407 }
408 
409 static llvm::Value *
getDefaultBuiltinObjectSizeResult(unsigned Type,llvm::IntegerType * ResType)410 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
411   return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
412 }
413 
414 llvm::Value *
evaluateOrEmitBuiltinObjectSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType)415 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
416                                                  llvm::IntegerType *ResType) {
417   uint64_t ObjectSize;
418   if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
419     return emitBuiltinObjectSize(E, Type, ResType);
420   return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
421 }
422 
423 /// Returns a Value corresponding to the size of the given expression.
424 /// This Value may be either of the following:
425 ///   - A llvm::Argument (if E is a param with the pass_object_size attribute on
426 ///     it)
427 ///   - A call to the @llvm.objectsize intrinsic
428 llvm::Value *
emitBuiltinObjectSize(const Expr * E,unsigned Type,llvm::IntegerType * ResType)429 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
430                                        llvm::IntegerType *ResType) {
431   // We need to reference an argument if the pointer is a parameter with the
432   // pass_object_size attribute.
433   if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
434     auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
435     auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
436     if (Param != nullptr && PS != nullptr &&
437         areBOSTypesCompatible(PS->getType(), Type)) {
438       auto Iter = SizeArguments.find(Param);
439       assert(Iter != SizeArguments.end());
440 
441       const ImplicitParamDecl *D = Iter->second;
442       auto DIter = LocalDeclMap.find(D);
443       assert(DIter != LocalDeclMap.end());
444 
445       return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
446                               getContext().getSizeType(), E->getLocStart());
447     }
448   }
449 
450   // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
451   // evaluate E for side-effects. In either case, we shouldn't lower to
452   // @llvm.objectsize.
453   if (Type == 3 || E->HasSideEffects(getContext()))
454     return getDefaultBuiltinObjectSizeResult(Type, ResType);
455 
456   // LLVM only supports 0 and 2, make sure that we pass along that
457   // as a boolean.
458   auto *CI = ConstantInt::get(Builder.getInt1Ty(), (Type & 2) >> 1);
459   // FIXME: Get right address space.
460   llvm::Type *Tys[] = {ResType, Builder.getInt8PtrTy(0)};
461   Value *F = CGM.getIntrinsic(Intrinsic::objectsize, Tys);
462   return Builder.CreateCall(F, {EmitScalarExpr(E), CI});
463 }
464 
EmitBuiltinExpr(const FunctionDecl * FD,unsigned BuiltinID,const CallExpr * E,ReturnValueSlot ReturnValue)465 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
466                                         unsigned BuiltinID, const CallExpr *E,
467                                         ReturnValueSlot ReturnValue) {
468   // See if we can constant fold this builtin.  If so, don't emit it at all.
469   Expr::EvalResult Result;
470   if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
471       !Result.hasSideEffects()) {
472     if (Result.Val.isInt())
473       return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
474                                                 Result.Val.getInt()));
475     if (Result.Val.isFloat())
476       return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
477                                                Result.Val.getFloat()));
478   }
479 
480   switch (BuiltinID) {
481   default: break;  // Handle intrinsics and libm functions below.
482   case Builtin::BI__builtin___CFStringMakeConstantString:
483   case Builtin::BI__builtin___NSStringMakeConstantString:
484     return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
485   case Builtin::BI__builtin_stdarg_start:
486   case Builtin::BI__builtin_va_start:
487   case Builtin::BI__va_start:
488   case Builtin::BI__builtin_va_end:
489     return RValue::get(
490         EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
491                            ? EmitScalarExpr(E->getArg(0))
492                            : EmitVAListRef(E->getArg(0)).getPointer(),
493                        BuiltinID != Builtin::BI__builtin_va_end));
494   case Builtin::BI__builtin_va_copy: {
495     Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
496     Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
497 
498     llvm::Type *Type = Int8PtrTy;
499 
500     DstPtr = Builder.CreateBitCast(DstPtr, Type);
501     SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
502     return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
503                                           {DstPtr, SrcPtr}));
504   }
505   case Builtin::BI__builtin_abs:
506   case Builtin::BI__builtin_labs:
507   case Builtin::BI__builtin_llabs: {
508     Value *ArgValue = EmitScalarExpr(E->getArg(0));
509 
510     Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
511     Value *CmpResult =
512     Builder.CreateICmpSGE(ArgValue,
513                           llvm::Constant::getNullValue(ArgValue->getType()),
514                                                             "abscond");
515     Value *Result =
516       Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
517 
518     return RValue::get(Result);
519   }
520   case Builtin::BI__builtin_fabs:
521   case Builtin::BI__builtin_fabsf:
522   case Builtin::BI__builtin_fabsl: {
523     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
524   }
525   case Builtin::BI__builtin_fmod:
526   case Builtin::BI__builtin_fmodf:
527   case Builtin::BI__builtin_fmodl: {
528     Value *Arg1 = EmitScalarExpr(E->getArg(0));
529     Value *Arg2 = EmitScalarExpr(E->getArg(1));
530     Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
531     return RValue::get(Result);
532   }
533   case Builtin::BI__builtin_copysign:
534   case Builtin::BI__builtin_copysignf:
535   case Builtin::BI__builtin_copysignl: {
536     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::copysign));
537   }
538   case Builtin::BI__builtin_ceil:
539   case Builtin::BI__builtin_ceilf:
540   case Builtin::BI__builtin_ceill: {
541     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
542   }
543   case Builtin::BI__builtin_floor:
544   case Builtin::BI__builtin_floorf:
545   case Builtin::BI__builtin_floorl: {
546     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
547   }
548   case Builtin::BI__builtin_trunc:
549   case Builtin::BI__builtin_truncf:
550   case Builtin::BI__builtin_truncl: {
551     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
552   }
553   case Builtin::BI__builtin_rint:
554   case Builtin::BI__builtin_rintf:
555   case Builtin::BI__builtin_rintl: {
556     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
557   }
558   case Builtin::BI__builtin_nearbyint:
559   case Builtin::BI__builtin_nearbyintf:
560   case Builtin::BI__builtin_nearbyintl: {
561     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::nearbyint));
562   }
563   case Builtin::BI__builtin_round:
564   case Builtin::BI__builtin_roundf:
565   case Builtin::BI__builtin_roundl: {
566     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
567   }
568   case Builtin::BI__builtin_fmin:
569   case Builtin::BI__builtin_fminf:
570   case Builtin::BI__builtin_fminl: {
571     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
572   }
573   case Builtin::BI__builtin_fmax:
574   case Builtin::BI__builtin_fmaxf:
575   case Builtin::BI__builtin_fmaxl: {
576     return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
577   }
578   case Builtin::BI__builtin_conj:
579   case Builtin::BI__builtin_conjf:
580   case Builtin::BI__builtin_conjl: {
581     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
582     Value *Real = ComplexVal.first;
583     Value *Imag = ComplexVal.second;
584     Value *Zero =
585       Imag->getType()->isFPOrFPVectorTy()
586         ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
587         : llvm::Constant::getNullValue(Imag->getType());
588 
589     Imag = Builder.CreateFSub(Zero, Imag, "sub");
590     return RValue::getComplex(std::make_pair(Real, Imag));
591   }
592   case Builtin::BI__builtin_creal:
593   case Builtin::BI__builtin_crealf:
594   case Builtin::BI__builtin_creall:
595   case Builtin::BIcreal:
596   case Builtin::BIcrealf:
597   case Builtin::BIcreall: {
598     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
599     return RValue::get(ComplexVal.first);
600   }
601 
602   case Builtin::BI__builtin_cimag:
603   case Builtin::BI__builtin_cimagf:
604   case Builtin::BI__builtin_cimagl:
605   case Builtin::BIcimag:
606   case Builtin::BIcimagf:
607   case Builtin::BIcimagl: {
608     ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
609     return RValue::get(ComplexVal.second);
610   }
611 
612   case Builtin::BI__builtin_ctzs:
613   case Builtin::BI__builtin_ctz:
614   case Builtin::BI__builtin_ctzl:
615   case Builtin::BI__builtin_ctzll: {
616     Value *ArgValue = EmitScalarExpr(E->getArg(0));
617 
618     llvm::Type *ArgType = ArgValue->getType();
619     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
620 
621     llvm::Type *ResultType = ConvertType(E->getType());
622     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
623     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
624     if (Result->getType() != ResultType)
625       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
626                                      "cast");
627     return RValue::get(Result);
628   }
629   case Builtin::BI__builtin_clzs:
630   case Builtin::BI__builtin_clz:
631   case Builtin::BI__builtin_clzl:
632   case Builtin::BI__builtin_clzll: {
633     Value *ArgValue = EmitScalarExpr(E->getArg(0));
634 
635     llvm::Type *ArgType = ArgValue->getType();
636     Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
637 
638     llvm::Type *ResultType = ConvertType(E->getType());
639     Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
640     Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
641     if (Result->getType() != ResultType)
642       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
643                                      "cast");
644     return RValue::get(Result);
645   }
646   case Builtin::BI__builtin_ffs:
647   case Builtin::BI__builtin_ffsl:
648   case Builtin::BI__builtin_ffsll: {
649     // ffs(x) -> x ? cttz(x) + 1 : 0
650     Value *ArgValue = EmitScalarExpr(E->getArg(0));
651 
652     llvm::Type *ArgType = ArgValue->getType();
653     Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
654 
655     llvm::Type *ResultType = ConvertType(E->getType());
656     Value *Tmp =
657         Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
658                           llvm::ConstantInt::get(ArgType, 1));
659     Value *Zero = llvm::Constant::getNullValue(ArgType);
660     Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
661     Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
662     if (Result->getType() != ResultType)
663       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
664                                      "cast");
665     return RValue::get(Result);
666   }
667   case Builtin::BI__builtin_parity:
668   case Builtin::BI__builtin_parityl:
669   case Builtin::BI__builtin_parityll: {
670     // parity(x) -> ctpop(x) & 1
671     Value *ArgValue = EmitScalarExpr(E->getArg(0));
672 
673     llvm::Type *ArgType = ArgValue->getType();
674     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
675 
676     llvm::Type *ResultType = ConvertType(E->getType());
677     Value *Tmp = Builder.CreateCall(F, ArgValue);
678     Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
679     if (Result->getType() != ResultType)
680       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
681                                      "cast");
682     return RValue::get(Result);
683   }
684   case Builtin::BI__builtin_popcount:
685   case Builtin::BI__builtin_popcountl:
686   case Builtin::BI__builtin_popcountll: {
687     Value *ArgValue = EmitScalarExpr(E->getArg(0));
688 
689     llvm::Type *ArgType = ArgValue->getType();
690     Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
691 
692     llvm::Type *ResultType = ConvertType(E->getType());
693     Value *Result = Builder.CreateCall(F, ArgValue);
694     if (Result->getType() != ResultType)
695       Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
696                                      "cast");
697     return RValue::get(Result);
698   }
699   case Builtin::BI__builtin_unpredictable: {
700     // Always return the argument of __builtin_unpredictable. LLVM does not
701     // handle this builtin. Metadata for this builtin should be added directly
702     // to instructions such as branches or switches that use it.
703     return RValue::get(EmitScalarExpr(E->getArg(0)));
704   }
705   case Builtin::BI__builtin_expect: {
706     Value *ArgValue = EmitScalarExpr(E->getArg(0));
707     llvm::Type *ArgType = ArgValue->getType();
708 
709     Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
710     // Don't generate llvm.expect on -O0 as the backend won't use it for
711     // anything.
712     // Note, we still IRGen ExpectedValue because it could have side-effects.
713     if (CGM.getCodeGenOpts().OptimizationLevel == 0)
714       return RValue::get(ArgValue);
715 
716     Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
717     Value *Result =
718         Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
719     return RValue::get(Result);
720   }
721   case Builtin::BI__builtin_assume_aligned: {
722     Value *PtrValue = EmitScalarExpr(E->getArg(0));
723     Value *OffsetValue =
724       (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
725 
726     Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
727     ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
728     unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
729 
730     EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
731     return RValue::get(PtrValue);
732   }
733   case Builtin::BI__assume:
734   case Builtin::BI__builtin_assume: {
735     if (E->getArg(0)->HasSideEffects(getContext()))
736       return RValue::get(nullptr);
737 
738     Value *ArgValue = EmitScalarExpr(E->getArg(0));
739     Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
740     return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
741   }
742   case Builtin::BI__builtin_bswap16:
743   case Builtin::BI__builtin_bswap32:
744   case Builtin::BI__builtin_bswap64: {
745     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
746   }
747   case Builtin::BI__builtin_bitreverse8:
748   case Builtin::BI__builtin_bitreverse16:
749   case Builtin::BI__builtin_bitreverse32:
750   case Builtin::BI__builtin_bitreverse64: {
751     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
752   }
753   case Builtin::BI__builtin_object_size: {
754     unsigned Type =
755         E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
756     auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
757 
758     // We pass this builtin onto the optimizer so that it can figure out the
759     // object size in more complex cases.
760     return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType));
761   }
762   case Builtin::BI__builtin_prefetch: {
763     Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
764     // FIXME: Technically these constants should of type 'int', yes?
765     RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
766       llvm::ConstantInt::get(Int32Ty, 0);
767     Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
768       llvm::ConstantInt::get(Int32Ty, 3);
769     Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
770     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
771     return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
772   }
773   case Builtin::BI__builtin_readcyclecounter: {
774     Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
775     return RValue::get(Builder.CreateCall(F));
776   }
777   case Builtin::BI__builtin___clear_cache: {
778     Value *Begin = EmitScalarExpr(E->getArg(0));
779     Value *End = EmitScalarExpr(E->getArg(1));
780     Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
781     return RValue::get(Builder.CreateCall(F, {Begin, End}));
782   }
783   case Builtin::BI__builtin_trap:
784     return RValue::get(EmitTrapCall(Intrinsic::trap));
785   case Builtin::BI__debugbreak:
786     return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
787   case Builtin::BI__builtin_unreachable: {
788     if (SanOpts.has(SanitizerKind::Unreachable)) {
789       SanitizerScope SanScope(this);
790       EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
791                                SanitizerKind::Unreachable),
792                 "builtin_unreachable", EmitCheckSourceLocation(E->getExprLoc()),
793                 None);
794     } else
795       Builder.CreateUnreachable();
796 
797     // We do need to preserve an insertion point.
798     EmitBlock(createBasicBlock("unreachable.cont"));
799 
800     return RValue::get(nullptr);
801   }
802 
803   case Builtin::BI__builtin_powi:
804   case Builtin::BI__builtin_powif:
805   case Builtin::BI__builtin_powil: {
806     Value *Base = EmitScalarExpr(E->getArg(0));
807     Value *Exponent = EmitScalarExpr(E->getArg(1));
808     llvm::Type *ArgType = Base->getType();
809     Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
810     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
811   }
812 
813   case Builtin::BI__builtin_isgreater:
814   case Builtin::BI__builtin_isgreaterequal:
815   case Builtin::BI__builtin_isless:
816   case Builtin::BI__builtin_islessequal:
817   case Builtin::BI__builtin_islessgreater:
818   case Builtin::BI__builtin_isunordered: {
819     // Ordered comparisons: we know the arguments to these are matching scalar
820     // floating point values.
821     Value *LHS = EmitScalarExpr(E->getArg(0));
822     Value *RHS = EmitScalarExpr(E->getArg(1));
823 
824     switch (BuiltinID) {
825     default: llvm_unreachable("Unknown ordered comparison");
826     case Builtin::BI__builtin_isgreater:
827       LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
828       break;
829     case Builtin::BI__builtin_isgreaterequal:
830       LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
831       break;
832     case Builtin::BI__builtin_isless:
833       LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
834       break;
835     case Builtin::BI__builtin_islessequal:
836       LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
837       break;
838     case Builtin::BI__builtin_islessgreater:
839       LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
840       break;
841     case Builtin::BI__builtin_isunordered:
842       LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
843       break;
844     }
845     // ZExt bool to int type.
846     return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
847   }
848   case Builtin::BI__builtin_isnan: {
849     Value *V = EmitScalarExpr(E->getArg(0));
850     V = Builder.CreateFCmpUNO(V, V, "cmp");
851     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
852   }
853 
854   case Builtin::BI__builtin_isinf:
855   case Builtin::BI__builtin_isfinite: {
856     // isinf(x)    --> fabs(x) == infinity
857     // isfinite(x) --> fabs(x) != infinity
858     // x != NaN via the ordered compare in either case.
859     Value *V = EmitScalarExpr(E->getArg(0));
860     Value *Fabs = EmitFAbs(*this, V);
861     Constant *Infinity = ConstantFP::getInfinity(V->getType());
862     CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
863                                   ? CmpInst::FCMP_OEQ
864                                   : CmpInst::FCMP_ONE;
865     Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
866     return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
867   }
868 
869   case Builtin::BI__builtin_isinf_sign: {
870     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
871     Value *Arg = EmitScalarExpr(E->getArg(0));
872     Value *AbsArg = EmitFAbs(*this, Arg);
873     Value *IsInf = Builder.CreateFCmpOEQ(
874         AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
875     Value *IsNeg = EmitSignBit(*this, Arg);
876 
877     llvm::Type *IntTy = ConvertType(E->getType());
878     Value *Zero = Constant::getNullValue(IntTy);
879     Value *One = ConstantInt::get(IntTy, 1);
880     Value *NegativeOne = ConstantInt::get(IntTy, -1);
881     Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
882     Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
883     return RValue::get(Result);
884   }
885 
886   case Builtin::BI__builtin_isnormal: {
887     // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
888     Value *V = EmitScalarExpr(E->getArg(0));
889     Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
890 
891     Value *Abs = EmitFAbs(*this, V);
892     Value *IsLessThanInf =
893       Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
894     APFloat Smallest = APFloat::getSmallestNormalized(
895                    getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
896     Value *IsNormal =
897       Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
898                             "isnormal");
899     V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
900     V = Builder.CreateAnd(V, IsNormal, "and");
901     return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
902   }
903 
904   case Builtin::BI__builtin_fpclassify: {
905     Value *V = EmitScalarExpr(E->getArg(5));
906     llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
907 
908     // Create Result
909     BasicBlock *Begin = Builder.GetInsertBlock();
910     BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
911     Builder.SetInsertPoint(End);
912     PHINode *Result =
913       Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
914                         "fpclassify_result");
915 
916     // if (V==0) return FP_ZERO
917     Builder.SetInsertPoint(Begin);
918     Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
919                                           "iszero");
920     Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
921     BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
922     Builder.CreateCondBr(IsZero, End, NotZero);
923     Result->addIncoming(ZeroLiteral, Begin);
924 
925     // if (V != V) return FP_NAN
926     Builder.SetInsertPoint(NotZero);
927     Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
928     Value *NanLiteral = EmitScalarExpr(E->getArg(0));
929     BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
930     Builder.CreateCondBr(IsNan, End, NotNan);
931     Result->addIncoming(NanLiteral, NotZero);
932 
933     // if (fabs(V) == infinity) return FP_INFINITY
934     Builder.SetInsertPoint(NotNan);
935     Value *VAbs = EmitFAbs(*this, V);
936     Value *IsInf =
937       Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
938                             "isinf");
939     Value *InfLiteral = EmitScalarExpr(E->getArg(1));
940     BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
941     Builder.CreateCondBr(IsInf, End, NotInf);
942     Result->addIncoming(InfLiteral, NotNan);
943 
944     // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
945     Builder.SetInsertPoint(NotInf);
946     APFloat Smallest = APFloat::getSmallestNormalized(
947         getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
948     Value *IsNormal =
949       Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
950                             "isnormal");
951     Value *NormalResult =
952       Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
953                            EmitScalarExpr(E->getArg(3)));
954     Builder.CreateBr(End);
955     Result->addIncoming(NormalResult, NotInf);
956 
957     // return Result
958     Builder.SetInsertPoint(End);
959     return RValue::get(Result);
960   }
961 
962   case Builtin::BIalloca:
963   case Builtin::BI_alloca:
964   case Builtin::BI__builtin_alloca: {
965     Value *Size = EmitScalarExpr(E->getArg(0));
966     return RValue::get(Builder.CreateAlloca(Builder.getInt8Ty(), Size));
967   }
968   case Builtin::BIbzero:
969   case Builtin::BI__builtin_bzero: {
970     Address Dest = EmitPointerWithAlignment(E->getArg(0));
971     Value *SizeVal = EmitScalarExpr(E->getArg(1));
972     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
973                         E->getArg(0)->getExprLoc(), FD, 0);
974     Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
975     return RValue::get(Dest.getPointer());
976   }
977   case Builtin::BImemcpy:
978   case Builtin::BI__builtin_memcpy: {
979     Address Dest = EmitPointerWithAlignment(E->getArg(0));
980     Address Src = EmitPointerWithAlignment(E->getArg(1));
981     Value *SizeVal = EmitScalarExpr(E->getArg(2));
982     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
983                         E->getArg(0)->getExprLoc(), FD, 0);
984     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
985                         E->getArg(1)->getExprLoc(), FD, 1);
986     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
987     return RValue::get(Dest.getPointer());
988   }
989 
990   case Builtin::BI__builtin___memcpy_chk: {
991     // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
992     llvm::APSInt Size, DstSize;
993     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
994         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
995       break;
996     if (Size.ugt(DstSize))
997       break;
998     Address Dest = EmitPointerWithAlignment(E->getArg(0));
999     Address Src = EmitPointerWithAlignment(E->getArg(1));
1000     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1001     Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1002     return RValue::get(Dest.getPointer());
1003   }
1004 
1005   case Builtin::BI__builtin_objc_memmove_collectable: {
1006     Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1007     Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1008     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1009     CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1010                                                   DestAddr, SrcAddr, SizeVal);
1011     return RValue::get(DestAddr.getPointer());
1012   }
1013 
1014   case Builtin::BI__builtin___memmove_chk: {
1015     // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1016     llvm::APSInt Size, DstSize;
1017     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1018         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1019       break;
1020     if (Size.ugt(DstSize))
1021       break;
1022     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1023     Address Src = EmitPointerWithAlignment(E->getArg(1));
1024     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1025     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1026     return RValue::get(Dest.getPointer());
1027   }
1028 
1029   case Builtin::BImemmove:
1030   case Builtin::BI__builtin_memmove: {
1031     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1032     Address Src = EmitPointerWithAlignment(E->getArg(1));
1033     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1034     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1035                         E->getArg(0)->getExprLoc(), FD, 0);
1036     EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1037                         E->getArg(1)->getExprLoc(), FD, 1);
1038     Builder.CreateMemMove(Dest, Src, SizeVal, false);
1039     return RValue::get(Dest.getPointer());
1040   }
1041   case Builtin::BImemset:
1042   case Builtin::BI__builtin_memset: {
1043     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1044     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1045                                          Builder.getInt8Ty());
1046     Value *SizeVal = EmitScalarExpr(E->getArg(2));
1047     EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1048                         E->getArg(0)->getExprLoc(), FD, 0);
1049     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1050     return RValue::get(Dest.getPointer());
1051   }
1052   case Builtin::BI__builtin___memset_chk: {
1053     // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1054     llvm::APSInt Size, DstSize;
1055     if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1056         !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1057       break;
1058     if (Size.ugt(DstSize))
1059       break;
1060     Address Dest = EmitPointerWithAlignment(E->getArg(0));
1061     Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1062                                          Builder.getInt8Ty());
1063     Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1064     Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1065     return RValue::get(Dest.getPointer());
1066   }
1067   case Builtin::BI__builtin_dwarf_cfa: {
1068     // The offset in bytes from the first argument to the CFA.
1069     //
1070     // Why on earth is this in the frontend?  Is there any reason at
1071     // all that the backend can't reasonably determine this while
1072     // lowering llvm.eh.dwarf.cfa()?
1073     //
1074     // TODO: If there's a satisfactory reason, add a target hook for
1075     // this instead of hard-coding 0, which is correct for most targets.
1076     int32_t Offset = 0;
1077 
1078     Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1079     return RValue::get(Builder.CreateCall(F,
1080                                       llvm::ConstantInt::get(Int32Ty, Offset)));
1081   }
1082   case Builtin::BI__builtin_return_address: {
1083     Value *Depth =
1084         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1085     Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1086     return RValue::get(Builder.CreateCall(F, Depth));
1087   }
1088   case Builtin::BI__builtin_frame_address: {
1089     Value *Depth =
1090         CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1091     Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1092     return RValue::get(Builder.CreateCall(F, Depth));
1093   }
1094   case Builtin::BI__builtin_extract_return_addr: {
1095     Value *Address = EmitScalarExpr(E->getArg(0));
1096     Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1097     return RValue::get(Result);
1098   }
1099   case Builtin::BI__builtin_frob_return_addr: {
1100     Value *Address = EmitScalarExpr(E->getArg(0));
1101     Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1102     return RValue::get(Result);
1103   }
1104   case Builtin::BI__builtin_dwarf_sp_column: {
1105     llvm::IntegerType *Ty
1106       = cast<llvm::IntegerType>(ConvertType(E->getType()));
1107     int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1108     if (Column == -1) {
1109       CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1110       return RValue::get(llvm::UndefValue::get(Ty));
1111     }
1112     return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1113   }
1114   case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1115     Value *Address = EmitScalarExpr(E->getArg(0));
1116     if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1117       CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1118     return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1119   }
1120   case Builtin::BI__builtin_eh_return: {
1121     Value *Int = EmitScalarExpr(E->getArg(0));
1122     Value *Ptr = EmitScalarExpr(E->getArg(1));
1123 
1124     llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1125     assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1126            "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1127     Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1128                                   ? Intrinsic::eh_return_i32
1129                                   : Intrinsic::eh_return_i64);
1130     Builder.CreateCall(F, {Int, Ptr});
1131     Builder.CreateUnreachable();
1132 
1133     // We do need to preserve an insertion point.
1134     EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1135 
1136     return RValue::get(nullptr);
1137   }
1138   case Builtin::BI__builtin_unwind_init: {
1139     Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1140     return RValue::get(Builder.CreateCall(F));
1141   }
1142   case Builtin::BI__builtin_extend_pointer: {
1143     // Extends a pointer to the size of an _Unwind_Word, which is
1144     // uint64_t on all platforms.  Generally this gets poked into a
1145     // register and eventually used as an address, so if the
1146     // addressing registers are wider than pointers and the platform
1147     // doesn't implicitly ignore high-order bits when doing
1148     // addressing, we need to make sure we zext / sext based on
1149     // the platform's expectations.
1150     //
1151     // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1152 
1153     // Cast the pointer to intptr_t.
1154     Value *Ptr = EmitScalarExpr(E->getArg(0));
1155     Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1156 
1157     // If that's 64 bits, we're done.
1158     if (IntPtrTy->getBitWidth() == 64)
1159       return RValue::get(Result);
1160 
1161     // Otherwise, ask the codegen data what to do.
1162     if (getTargetHooks().extendPointerWithSExt())
1163       return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1164     else
1165       return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1166   }
1167   case Builtin::BI__builtin_setjmp: {
1168     // Buffer is a void**.
1169     Address Buf = EmitPointerWithAlignment(E->getArg(0));
1170 
1171     // Store the frame pointer to the setjmp buffer.
1172     Value *FrameAddr =
1173       Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1174                          ConstantInt::get(Int32Ty, 0));
1175     Builder.CreateStore(FrameAddr, Buf);
1176 
1177     // Store the stack pointer to the setjmp buffer.
1178     Value *StackAddr =
1179         Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1180     Address StackSaveSlot =
1181       Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1182     Builder.CreateStore(StackAddr, StackSaveSlot);
1183 
1184     // Call LLVM's EH setjmp, which is lightweight.
1185     Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1186     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1187     return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1188   }
1189   case Builtin::BI__builtin_longjmp: {
1190     Value *Buf = EmitScalarExpr(E->getArg(0));
1191     Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1192 
1193     // Call LLVM's EH longjmp, which is lightweight.
1194     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1195 
1196     // longjmp doesn't return; mark this as unreachable.
1197     Builder.CreateUnreachable();
1198 
1199     // We do need to preserve an insertion point.
1200     EmitBlock(createBasicBlock("longjmp.cont"));
1201 
1202     return RValue::get(nullptr);
1203   }
1204   case Builtin::BI__sync_fetch_and_add:
1205   case Builtin::BI__sync_fetch_and_sub:
1206   case Builtin::BI__sync_fetch_and_or:
1207   case Builtin::BI__sync_fetch_and_and:
1208   case Builtin::BI__sync_fetch_and_xor:
1209   case Builtin::BI__sync_fetch_and_nand:
1210   case Builtin::BI__sync_add_and_fetch:
1211   case Builtin::BI__sync_sub_and_fetch:
1212   case Builtin::BI__sync_and_and_fetch:
1213   case Builtin::BI__sync_or_and_fetch:
1214   case Builtin::BI__sync_xor_and_fetch:
1215   case Builtin::BI__sync_nand_and_fetch:
1216   case Builtin::BI__sync_val_compare_and_swap:
1217   case Builtin::BI__sync_bool_compare_and_swap:
1218   case Builtin::BI__sync_lock_test_and_set:
1219   case Builtin::BI__sync_lock_release:
1220   case Builtin::BI__sync_swap:
1221     llvm_unreachable("Shouldn't make it through sema");
1222   case Builtin::BI__sync_fetch_and_add_1:
1223   case Builtin::BI__sync_fetch_and_add_2:
1224   case Builtin::BI__sync_fetch_and_add_4:
1225   case Builtin::BI__sync_fetch_and_add_8:
1226   case Builtin::BI__sync_fetch_and_add_16:
1227     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1228   case Builtin::BI__sync_fetch_and_sub_1:
1229   case Builtin::BI__sync_fetch_and_sub_2:
1230   case Builtin::BI__sync_fetch_and_sub_4:
1231   case Builtin::BI__sync_fetch_and_sub_8:
1232   case Builtin::BI__sync_fetch_and_sub_16:
1233     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1234   case Builtin::BI__sync_fetch_and_or_1:
1235   case Builtin::BI__sync_fetch_and_or_2:
1236   case Builtin::BI__sync_fetch_and_or_4:
1237   case Builtin::BI__sync_fetch_and_or_8:
1238   case Builtin::BI__sync_fetch_and_or_16:
1239     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1240   case Builtin::BI__sync_fetch_and_and_1:
1241   case Builtin::BI__sync_fetch_and_and_2:
1242   case Builtin::BI__sync_fetch_and_and_4:
1243   case Builtin::BI__sync_fetch_and_and_8:
1244   case Builtin::BI__sync_fetch_and_and_16:
1245     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1246   case Builtin::BI__sync_fetch_and_xor_1:
1247   case Builtin::BI__sync_fetch_and_xor_2:
1248   case Builtin::BI__sync_fetch_and_xor_4:
1249   case Builtin::BI__sync_fetch_and_xor_8:
1250   case Builtin::BI__sync_fetch_and_xor_16:
1251     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1252   case Builtin::BI__sync_fetch_and_nand_1:
1253   case Builtin::BI__sync_fetch_and_nand_2:
1254   case Builtin::BI__sync_fetch_and_nand_4:
1255   case Builtin::BI__sync_fetch_and_nand_8:
1256   case Builtin::BI__sync_fetch_and_nand_16:
1257     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1258 
1259   // Clang extensions: not overloaded yet.
1260   case Builtin::BI__sync_fetch_and_min:
1261     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1262   case Builtin::BI__sync_fetch_and_max:
1263     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1264   case Builtin::BI__sync_fetch_and_umin:
1265     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1266   case Builtin::BI__sync_fetch_and_umax:
1267     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1268 
1269   case Builtin::BI__sync_add_and_fetch_1:
1270   case Builtin::BI__sync_add_and_fetch_2:
1271   case Builtin::BI__sync_add_and_fetch_4:
1272   case Builtin::BI__sync_add_and_fetch_8:
1273   case Builtin::BI__sync_add_and_fetch_16:
1274     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1275                                 llvm::Instruction::Add);
1276   case Builtin::BI__sync_sub_and_fetch_1:
1277   case Builtin::BI__sync_sub_and_fetch_2:
1278   case Builtin::BI__sync_sub_and_fetch_4:
1279   case Builtin::BI__sync_sub_and_fetch_8:
1280   case Builtin::BI__sync_sub_and_fetch_16:
1281     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1282                                 llvm::Instruction::Sub);
1283   case Builtin::BI__sync_and_and_fetch_1:
1284   case Builtin::BI__sync_and_and_fetch_2:
1285   case Builtin::BI__sync_and_and_fetch_4:
1286   case Builtin::BI__sync_and_and_fetch_8:
1287   case Builtin::BI__sync_and_and_fetch_16:
1288     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::And, E,
1289                                 llvm::Instruction::And);
1290   case Builtin::BI__sync_or_and_fetch_1:
1291   case Builtin::BI__sync_or_and_fetch_2:
1292   case Builtin::BI__sync_or_and_fetch_4:
1293   case Builtin::BI__sync_or_and_fetch_8:
1294   case Builtin::BI__sync_or_and_fetch_16:
1295     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1296                                 llvm::Instruction::Or);
1297   case Builtin::BI__sync_xor_and_fetch_1:
1298   case Builtin::BI__sync_xor_and_fetch_2:
1299   case Builtin::BI__sync_xor_and_fetch_4:
1300   case Builtin::BI__sync_xor_and_fetch_8:
1301   case Builtin::BI__sync_xor_and_fetch_16:
1302     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1303                                 llvm::Instruction::Xor);
1304   case Builtin::BI__sync_nand_and_fetch_1:
1305   case Builtin::BI__sync_nand_and_fetch_2:
1306   case Builtin::BI__sync_nand_and_fetch_4:
1307   case Builtin::BI__sync_nand_and_fetch_8:
1308   case Builtin::BI__sync_nand_and_fetch_16:
1309     return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1310                                 llvm::Instruction::And, true);
1311 
1312   case Builtin::BI__sync_val_compare_and_swap_1:
1313   case Builtin::BI__sync_val_compare_and_swap_2:
1314   case Builtin::BI__sync_val_compare_and_swap_4:
1315   case Builtin::BI__sync_val_compare_and_swap_8:
1316   case Builtin::BI__sync_val_compare_and_swap_16:
1317     return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1318 
1319   case Builtin::BI__sync_bool_compare_and_swap_1:
1320   case Builtin::BI__sync_bool_compare_and_swap_2:
1321   case Builtin::BI__sync_bool_compare_and_swap_4:
1322   case Builtin::BI__sync_bool_compare_and_swap_8:
1323   case Builtin::BI__sync_bool_compare_and_swap_16:
1324     return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1325 
1326   case Builtin::BI__sync_swap_1:
1327   case Builtin::BI__sync_swap_2:
1328   case Builtin::BI__sync_swap_4:
1329   case Builtin::BI__sync_swap_8:
1330   case Builtin::BI__sync_swap_16:
1331     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1332 
1333   case Builtin::BI__sync_lock_test_and_set_1:
1334   case Builtin::BI__sync_lock_test_and_set_2:
1335   case Builtin::BI__sync_lock_test_and_set_4:
1336   case Builtin::BI__sync_lock_test_and_set_8:
1337   case Builtin::BI__sync_lock_test_and_set_16:
1338     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1339 
1340   case Builtin::BI__sync_lock_release_1:
1341   case Builtin::BI__sync_lock_release_2:
1342   case Builtin::BI__sync_lock_release_4:
1343   case Builtin::BI__sync_lock_release_8:
1344   case Builtin::BI__sync_lock_release_16: {
1345     Value *Ptr = EmitScalarExpr(E->getArg(0));
1346     QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1347     CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1348     llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1349                                              StoreSize.getQuantity() * 8);
1350     Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1351     llvm::StoreInst *Store =
1352       Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1353                                  StoreSize);
1354     Store->setAtomic(llvm::AtomicOrdering::Release);
1355     return RValue::get(nullptr);
1356   }
1357 
1358   case Builtin::BI__sync_synchronize: {
1359     // We assume this is supposed to correspond to a C++0x-style
1360     // sequentially-consistent fence (i.e. this is only usable for
1361     // synchonization, not device I/O or anything like that). This intrinsic
1362     // is really badly designed in the sense that in theory, there isn't
1363     // any way to safely use it... but in practice, it mostly works
1364     // to use it with non-atomic loads and stores to get acquire/release
1365     // semantics.
1366     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1367     return RValue::get(nullptr);
1368   }
1369 
1370   case Builtin::BI__builtin_nontemporal_load:
1371     return RValue::get(EmitNontemporalLoad(*this, E));
1372   case Builtin::BI__builtin_nontemporal_store:
1373     return RValue::get(EmitNontemporalStore(*this, E));
1374   case Builtin::BI__c11_atomic_is_lock_free:
1375   case Builtin::BI__atomic_is_lock_free: {
1376     // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1377     // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1378     // _Atomic(T) is always properly-aligned.
1379     const char *LibCallName = "__atomic_is_lock_free";
1380     CallArgList Args;
1381     Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1382              getContext().getSizeType());
1383     if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1384       Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1385                getContext().VoidPtrTy);
1386     else
1387       Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1388                getContext().VoidPtrTy);
1389     const CGFunctionInfo &FuncInfo =
1390         CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1391     llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1392     llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1393     return EmitCall(FuncInfo, Func, ReturnValueSlot(), Args);
1394   }
1395 
1396   case Builtin::BI__atomic_test_and_set: {
1397     // Look at the argument type to determine whether this is a volatile
1398     // operation. The parameter type is always volatile.
1399     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1400     bool Volatile =
1401         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1402 
1403     Value *Ptr = EmitScalarExpr(E->getArg(0));
1404     unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1405     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1406     Value *NewVal = Builder.getInt8(1);
1407     Value *Order = EmitScalarExpr(E->getArg(1));
1408     if (isa<llvm::ConstantInt>(Order)) {
1409       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1410       AtomicRMWInst *Result = nullptr;
1411       switch (ord) {
1412       case 0:  // memory_order_relaxed
1413       default: // invalid order
1414         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1415                                          llvm::AtomicOrdering::Monotonic);
1416         break;
1417       case 1: // memory_order_consume
1418       case 2: // memory_order_acquire
1419         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1420                                          llvm::AtomicOrdering::Acquire);
1421         break;
1422       case 3: // memory_order_release
1423         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1424                                          llvm::AtomicOrdering::Release);
1425         break;
1426       case 4: // memory_order_acq_rel
1427 
1428         Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1429                                          llvm::AtomicOrdering::AcquireRelease);
1430         break;
1431       case 5: // memory_order_seq_cst
1432         Result = Builder.CreateAtomicRMW(
1433             llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1434             llvm::AtomicOrdering::SequentiallyConsistent);
1435         break;
1436       }
1437       Result->setVolatile(Volatile);
1438       return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1439     }
1440 
1441     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1442 
1443     llvm::BasicBlock *BBs[5] = {
1444       createBasicBlock("monotonic", CurFn),
1445       createBasicBlock("acquire", CurFn),
1446       createBasicBlock("release", CurFn),
1447       createBasicBlock("acqrel", CurFn),
1448       createBasicBlock("seqcst", CurFn)
1449     };
1450     llvm::AtomicOrdering Orders[5] = {
1451         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1452         llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1453         llvm::AtomicOrdering::SequentiallyConsistent};
1454 
1455     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1456     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1457 
1458     Builder.SetInsertPoint(ContBB);
1459     PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1460 
1461     for (unsigned i = 0; i < 5; ++i) {
1462       Builder.SetInsertPoint(BBs[i]);
1463       AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1464                                                    Ptr, NewVal, Orders[i]);
1465       RMW->setVolatile(Volatile);
1466       Result->addIncoming(RMW, BBs[i]);
1467       Builder.CreateBr(ContBB);
1468     }
1469 
1470     SI->addCase(Builder.getInt32(0), BBs[0]);
1471     SI->addCase(Builder.getInt32(1), BBs[1]);
1472     SI->addCase(Builder.getInt32(2), BBs[1]);
1473     SI->addCase(Builder.getInt32(3), BBs[2]);
1474     SI->addCase(Builder.getInt32(4), BBs[3]);
1475     SI->addCase(Builder.getInt32(5), BBs[4]);
1476 
1477     Builder.SetInsertPoint(ContBB);
1478     return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1479   }
1480 
1481   case Builtin::BI__atomic_clear: {
1482     QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1483     bool Volatile =
1484         PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1485 
1486     Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1487     unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1488     Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1489     Value *NewVal = Builder.getInt8(0);
1490     Value *Order = EmitScalarExpr(E->getArg(1));
1491     if (isa<llvm::ConstantInt>(Order)) {
1492       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1493       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1494       switch (ord) {
1495       case 0:  // memory_order_relaxed
1496       default: // invalid order
1497         Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1498         break;
1499       case 3:  // memory_order_release
1500         Store->setOrdering(llvm::AtomicOrdering::Release);
1501         break;
1502       case 5:  // memory_order_seq_cst
1503         Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1504         break;
1505       }
1506       return RValue::get(nullptr);
1507     }
1508 
1509     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1510 
1511     llvm::BasicBlock *BBs[3] = {
1512       createBasicBlock("monotonic", CurFn),
1513       createBasicBlock("release", CurFn),
1514       createBasicBlock("seqcst", CurFn)
1515     };
1516     llvm::AtomicOrdering Orders[3] = {
1517         llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1518         llvm::AtomicOrdering::SequentiallyConsistent};
1519 
1520     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1521     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1522 
1523     for (unsigned i = 0; i < 3; ++i) {
1524       Builder.SetInsertPoint(BBs[i]);
1525       StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1526       Store->setOrdering(Orders[i]);
1527       Builder.CreateBr(ContBB);
1528     }
1529 
1530     SI->addCase(Builder.getInt32(0), BBs[0]);
1531     SI->addCase(Builder.getInt32(3), BBs[1]);
1532     SI->addCase(Builder.getInt32(5), BBs[2]);
1533 
1534     Builder.SetInsertPoint(ContBB);
1535     return RValue::get(nullptr);
1536   }
1537 
1538   case Builtin::BI__atomic_thread_fence:
1539   case Builtin::BI__atomic_signal_fence:
1540   case Builtin::BI__c11_atomic_thread_fence:
1541   case Builtin::BI__c11_atomic_signal_fence: {
1542     llvm::SynchronizationScope Scope;
1543     if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1544         BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1545       Scope = llvm::SingleThread;
1546     else
1547       Scope = llvm::CrossThread;
1548     Value *Order = EmitScalarExpr(E->getArg(0));
1549     if (isa<llvm::ConstantInt>(Order)) {
1550       int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1551       switch (ord) {
1552       case 0:  // memory_order_relaxed
1553       default: // invalid order
1554         break;
1555       case 1:  // memory_order_consume
1556       case 2:  // memory_order_acquire
1557         Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1558         break;
1559       case 3:  // memory_order_release
1560         Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1561         break;
1562       case 4:  // memory_order_acq_rel
1563         Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1564         break;
1565       case 5:  // memory_order_seq_cst
1566         Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
1567                             Scope);
1568         break;
1569       }
1570       return RValue::get(nullptr);
1571     }
1572 
1573     llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1574     AcquireBB = createBasicBlock("acquire", CurFn);
1575     ReleaseBB = createBasicBlock("release", CurFn);
1576     AcqRelBB = createBasicBlock("acqrel", CurFn);
1577     SeqCstBB = createBasicBlock("seqcst", CurFn);
1578     llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1579 
1580     Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1581     llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1582 
1583     Builder.SetInsertPoint(AcquireBB);
1584     Builder.CreateFence(llvm::AtomicOrdering::Acquire, Scope);
1585     Builder.CreateBr(ContBB);
1586     SI->addCase(Builder.getInt32(1), AcquireBB);
1587     SI->addCase(Builder.getInt32(2), AcquireBB);
1588 
1589     Builder.SetInsertPoint(ReleaseBB);
1590     Builder.CreateFence(llvm::AtomicOrdering::Release, Scope);
1591     Builder.CreateBr(ContBB);
1592     SI->addCase(Builder.getInt32(3), ReleaseBB);
1593 
1594     Builder.SetInsertPoint(AcqRelBB);
1595     Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, Scope);
1596     Builder.CreateBr(ContBB);
1597     SI->addCase(Builder.getInt32(4), AcqRelBB);
1598 
1599     Builder.SetInsertPoint(SeqCstBB);
1600     Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, Scope);
1601     Builder.CreateBr(ContBB);
1602     SI->addCase(Builder.getInt32(5), SeqCstBB);
1603 
1604     Builder.SetInsertPoint(ContBB);
1605     return RValue::get(nullptr);
1606   }
1607 
1608     // Library functions with special handling.
1609   case Builtin::BIsqrt:
1610   case Builtin::BIsqrtf:
1611   case Builtin::BIsqrtl: {
1612     // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1613     // in finite- or unsafe-math mode (the intrinsic has different semantics
1614     // for handling negative numbers compared to the library function, so
1615     // -fmath-errno=0 is not enough).
1616     if (!FD->hasAttr<ConstAttr>())
1617       break;
1618     if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1619           CGM.getCodeGenOpts().NoNaNsFPMath))
1620       break;
1621     Value *Arg0 = EmitScalarExpr(E->getArg(0));
1622     llvm::Type *ArgType = Arg0->getType();
1623     Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1624     return RValue::get(Builder.CreateCall(F, Arg0));
1625   }
1626 
1627   case Builtin::BI__builtin_pow:
1628   case Builtin::BI__builtin_powf:
1629   case Builtin::BI__builtin_powl:
1630   case Builtin::BIpow:
1631   case Builtin::BIpowf:
1632   case Builtin::BIpowl: {
1633     // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1634     if (!FD->hasAttr<ConstAttr>())
1635       break;
1636     Value *Base = EmitScalarExpr(E->getArg(0));
1637     Value *Exponent = EmitScalarExpr(E->getArg(1));
1638     llvm::Type *ArgType = Base->getType();
1639     Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1640     return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1641   }
1642 
1643   case Builtin::BIfma:
1644   case Builtin::BIfmaf:
1645   case Builtin::BIfmal:
1646   case Builtin::BI__builtin_fma:
1647   case Builtin::BI__builtin_fmaf:
1648   case Builtin::BI__builtin_fmal: {
1649     // Rewrite fma to intrinsic.
1650     Value *FirstArg = EmitScalarExpr(E->getArg(0));
1651     llvm::Type *ArgType = FirstArg->getType();
1652     Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1653     return RValue::get(
1654         Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1655                                EmitScalarExpr(E->getArg(2))}));
1656   }
1657 
1658   case Builtin::BI__builtin_signbit:
1659   case Builtin::BI__builtin_signbitf:
1660   case Builtin::BI__builtin_signbitl: {
1661     return RValue::get(
1662         Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1663                            ConvertType(E->getType())));
1664   }
1665   case Builtin::BI__builtin_annotation: {
1666     llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1667     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1668                                       AnnVal->getType());
1669 
1670     // Get the annotation string, go through casts. Sema requires this to be a
1671     // non-wide string literal, potentially casted, so the cast<> is safe.
1672     const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1673     StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1674     return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1675   }
1676   case Builtin::BI__builtin_addcb:
1677   case Builtin::BI__builtin_addcs:
1678   case Builtin::BI__builtin_addc:
1679   case Builtin::BI__builtin_addcl:
1680   case Builtin::BI__builtin_addcll:
1681   case Builtin::BI__builtin_subcb:
1682   case Builtin::BI__builtin_subcs:
1683   case Builtin::BI__builtin_subc:
1684   case Builtin::BI__builtin_subcl:
1685   case Builtin::BI__builtin_subcll: {
1686 
1687     // We translate all of these builtins from expressions of the form:
1688     //   int x = ..., y = ..., carryin = ..., carryout, result;
1689     //   result = __builtin_addc(x, y, carryin, &carryout);
1690     //
1691     // to LLVM IR of the form:
1692     //
1693     //   %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1694     //   %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1695     //   %carry1 = extractvalue {i32, i1} %tmp1, 1
1696     //   %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1697     //                                                       i32 %carryin)
1698     //   %result = extractvalue {i32, i1} %tmp2, 0
1699     //   %carry2 = extractvalue {i32, i1} %tmp2, 1
1700     //   %tmp3 = or i1 %carry1, %carry2
1701     //   %tmp4 = zext i1 %tmp3 to i32
1702     //   store i32 %tmp4, i32* %carryout
1703 
1704     // Scalarize our inputs.
1705     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1706     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1707     llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1708     Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1709 
1710     // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1711     llvm::Intrinsic::ID IntrinsicId;
1712     switch (BuiltinID) {
1713     default: llvm_unreachable("Unknown multiprecision builtin id.");
1714     case Builtin::BI__builtin_addcb:
1715     case Builtin::BI__builtin_addcs:
1716     case Builtin::BI__builtin_addc:
1717     case Builtin::BI__builtin_addcl:
1718     case Builtin::BI__builtin_addcll:
1719       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1720       break;
1721     case Builtin::BI__builtin_subcb:
1722     case Builtin::BI__builtin_subcs:
1723     case Builtin::BI__builtin_subc:
1724     case Builtin::BI__builtin_subcl:
1725     case Builtin::BI__builtin_subcll:
1726       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1727       break;
1728     }
1729 
1730     // Construct our resulting LLVM IR expression.
1731     llvm::Value *Carry1;
1732     llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
1733                                               X, Y, Carry1);
1734     llvm::Value *Carry2;
1735     llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
1736                                               Sum1, Carryin, Carry2);
1737     llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
1738                                                X->getType());
1739     Builder.CreateStore(CarryOut, CarryOutPtr);
1740     return RValue::get(Sum2);
1741   }
1742 
1743   case Builtin::BI__builtin_add_overflow:
1744   case Builtin::BI__builtin_sub_overflow:
1745   case Builtin::BI__builtin_mul_overflow: {
1746     const clang::Expr *LeftArg = E->getArg(0);
1747     const clang::Expr *RightArg = E->getArg(1);
1748     const clang::Expr *ResultArg = E->getArg(2);
1749 
1750     clang::QualType ResultQTy =
1751         ResultArg->getType()->castAs<PointerType>()->getPointeeType();
1752 
1753     WidthAndSignedness LeftInfo =
1754         getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
1755     WidthAndSignedness RightInfo =
1756         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
1757     WidthAndSignedness ResultInfo =
1758         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
1759     WidthAndSignedness EncompassingInfo =
1760         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
1761 
1762     llvm::Type *EncompassingLLVMTy =
1763         llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
1764 
1765     llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
1766 
1767     llvm::Intrinsic::ID IntrinsicId;
1768     switch (BuiltinID) {
1769     default:
1770       llvm_unreachable("Unknown overflow builtin id.");
1771     case Builtin::BI__builtin_add_overflow:
1772       IntrinsicId = EncompassingInfo.Signed
1773                         ? llvm::Intrinsic::sadd_with_overflow
1774                         : llvm::Intrinsic::uadd_with_overflow;
1775       break;
1776     case Builtin::BI__builtin_sub_overflow:
1777       IntrinsicId = EncompassingInfo.Signed
1778                         ? llvm::Intrinsic::ssub_with_overflow
1779                         : llvm::Intrinsic::usub_with_overflow;
1780       break;
1781     case Builtin::BI__builtin_mul_overflow:
1782       IntrinsicId = EncompassingInfo.Signed
1783                         ? llvm::Intrinsic::smul_with_overflow
1784                         : llvm::Intrinsic::umul_with_overflow;
1785       break;
1786     }
1787 
1788     llvm::Value *Left = EmitScalarExpr(LeftArg);
1789     llvm::Value *Right = EmitScalarExpr(RightArg);
1790     Address ResultPtr = EmitPointerWithAlignment(ResultArg);
1791 
1792     // Extend each operand to the encompassing type.
1793     Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
1794     Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
1795 
1796     // Perform the operation on the extended values.
1797     llvm::Value *Overflow, *Result;
1798     Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
1799 
1800     if (EncompassingInfo.Width > ResultInfo.Width) {
1801       // The encompassing type is wider than the result type, so we need to
1802       // truncate it.
1803       llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
1804 
1805       // To see if the truncation caused an overflow, we will extend
1806       // the result and then compare it to the original result.
1807       llvm::Value *ResultTruncExt = Builder.CreateIntCast(
1808           ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
1809       llvm::Value *TruncationOverflow =
1810           Builder.CreateICmpNE(Result, ResultTruncExt);
1811 
1812       Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
1813       Result = ResultTrunc;
1814     }
1815 
1816     // Finally, store the result using the pointer.
1817     bool isVolatile =
1818       ResultArg->getType()->getPointeeType().isVolatileQualified();
1819     Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
1820 
1821     return RValue::get(Overflow);
1822   }
1823 
1824   case Builtin::BI__builtin_uadd_overflow:
1825   case Builtin::BI__builtin_uaddl_overflow:
1826   case Builtin::BI__builtin_uaddll_overflow:
1827   case Builtin::BI__builtin_usub_overflow:
1828   case Builtin::BI__builtin_usubl_overflow:
1829   case Builtin::BI__builtin_usubll_overflow:
1830   case Builtin::BI__builtin_umul_overflow:
1831   case Builtin::BI__builtin_umull_overflow:
1832   case Builtin::BI__builtin_umulll_overflow:
1833   case Builtin::BI__builtin_sadd_overflow:
1834   case Builtin::BI__builtin_saddl_overflow:
1835   case Builtin::BI__builtin_saddll_overflow:
1836   case Builtin::BI__builtin_ssub_overflow:
1837   case Builtin::BI__builtin_ssubl_overflow:
1838   case Builtin::BI__builtin_ssubll_overflow:
1839   case Builtin::BI__builtin_smul_overflow:
1840   case Builtin::BI__builtin_smull_overflow:
1841   case Builtin::BI__builtin_smulll_overflow: {
1842 
1843     // We translate all of these builtins directly to the relevant llvm IR node.
1844 
1845     // Scalarize our inputs.
1846     llvm::Value *X = EmitScalarExpr(E->getArg(0));
1847     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1848     Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
1849 
1850     // Decide which of the overflow intrinsics we are lowering to:
1851     llvm::Intrinsic::ID IntrinsicId;
1852     switch (BuiltinID) {
1853     default: llvm_unreachable("Unknown overflow builtin id.");
1854     case Builtin::BI__builtin_uadd_overflow:
1855     case Builtin::BI__builtin_uaddl_overflow:
1856     case Builtin::BI__builtin_uaddll_overflow:
1857       IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1858       break;
1859     case Builtin::BI__builtin_usub_overflow:
1860     case Builtin::BI__builtin_usubl_overflow:
1861     case Builtin::BI__builtin_usubll_overflow:
1862       IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1863       break;
1864     case Builtin::BI__builtin_umul_overflow:
1865     case Builtin::BI__builtin_umull_overflow:
1866     case Builtin::BI__builtin_umulll_overflow:
1867       IntrinsicId = llvm::Intrinsic::umul_with_overflow;
1868       break;
1869     case Builtin::BI__builtin_sadd_overflow:
1870     case Builtin::BI__builtin_saddl_overflow:
1871     case Builtin::BI__builtin_saddll_overflow:
1872       IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
1873       break;
1874     case Builtin::BI__builtin_ssub_overflow:
1875     case Builtin::BI__builtin_ssubl_overflow:
1876     case Builtin::BI__builtin_ssubll_overflow:
1877       IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
1878       break;
1879     case Builtin::BI__builtin_smul_overflow:
1880     case Builtin::BI__builtin_smull_overflow:
1881     case Builtin::BI__builtin_smulll_overflow:
1882       IntrinsicId = llvm::Intrinsic::smul_with_overflow;
1883       break;
1884     }
1885 
1886 
1887     llvm::Value *Carry;
1888     llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
1889     Builder.CreateStore(Sum, SumOutPtr);
1890 
1891     return RValue::get(Carry);
1892   }
1893   case Builtin::BI__builtin_addressof:
1894     return RValue::get(EmitLValue(E->getArg(0)).getPointer());
1895   case Builtin::BI__builtin_operator_new:
1896     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1897                                     E->getArg(0), false);
1898   case Builtin::BI__builtin_operator_delete:
1899     return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
1900                                     E->getArg(0), true);
1901   case Builtin::BI__noop:
1902     // __noop always evaluates to an integer literal zero.
1903     return RValue::get(ConstantInt::get(IntTy, 0));
1904   case Builtin::BI__builtin_call_with_static_chain: {
1905     const CallExpr *Call = cast<CallExpr>(E->getArg(0));
1906     const Expr *Chain = E->getArg(1);
1907     return EmitCall(Call->getCallee()->getType(),
1908                     EmitScalarExpr(Call->getCallee()), Call, ReturnValue,
1909                     Call->getCalleeDecl(), EmitScalarExpr(Chain));
1910   }
1911   case Builtin::BI_InterlockedExchange:
1912   case Builtin::BI_InterlockedExchangePointer:
1913     return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1914   case Builtin::BI_InterlockedCompareExchangePointer: {
1915     llvm::Type *RTy;
1916     llvm::IntegerType *IntType =
1917       IntegerType::get(getLLVMContext(),
1918                        getContext().getTypeSize(E->getType()));
1919     llvm::Type *IntPtrType = IntType->getPointerTo();
1920 
1921     llvm::Value *Destination =
1922       Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
1923 
1924     llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
1925     RTy = Exchange->getType();
1926     Exchange = Builder.CreatePtrToInt(Exchange, IntType);
1927 
1928     llvm::Value *Comparand =
1929       Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
1930 
1931     auto Result =
1932         Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
1933                                     AtomicOrdering::SequentiallyConsistent,
1934                                     AtomicOrdering::SequentiallyConsistent);
1935     Result->setVolatile(true);
1936 
1937     return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
1938                                                                          0),
1939                                               RTy));
1940   }
1941   case Builtin::BI_InterlockedCompareExchange: {
1942     AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
1943         EmitScalarExpr(E->getArg(0)),
1944         EmitScalarExpr(E->getArg(2)),
1945         EmitScalarExpr(E->getArg(1)),
1946         AtomicOrdering::SequentiallyConsistent,
1947         AtomicOrdering::SequentiallyConsistent);
1948       CXI->setVolatile(true);
1949       return RValue::get(Builder.CreateExtractValue(CXI, 0));
1950   }
1951   case Builtin::BI_InterlockedIncrement: {
1952     llvm::Type *IntTy = ConvertType(E->getType());
1953     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1954       AtomicRMWInst::Add,
1955       EmitScalarExpr(E->getArg(0)),
1956       ConstantInt::get(IntTy, 1),
1957       llvm::AtomicOrdering::SequentiallyConsistent);
1958     RMWI->setVolatile(true);
1959     return RValue::get(Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1)));
1960   }
1961   case Builtin::BI_InterlockedDecrement: {
1962     llvm::Type *IntTy = ConvertType(E->getType());
1963     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1964       AtomicRMWInst::Sub,
1965       EmitScalarExpr(E->getArg(0)),
1966       ConstantInt::get(IntTy, 1),
1967       llvm::AtomicOrdering::SequentiallyConsistent);
1968     RMWI->setVolatile(true);
1969     return RValue::get(Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1)));
1970   }
1971   case Builtin::BI_InterlockedExchangeAdd: {
1972     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
1973       AtomicRMWInst::Add,
1974       EmitScalarExpr(E->getArg(0)),
1975       EmitScalarExpr(E->getArg(1)),
1976       llvm::AtomicOrdering::SequentiallyConsistent);
1977     RMWI->setVolatile(true);
1978     return RValue::get(RMWI);
1979   }
1980   case Builtin::BI__readfsdword: {
1981     llvm::Type *IntTy = ConvertType(E->getType());
1982     Value *IntToPtr =
1983       Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
1984                              llvm::PointerType::get(IntTy, 257));
1985     LoadInst *Load =
1986         Builder.CreateDefaultAlignedLoad(IntToPtr, /*isVolatile=*/true);
1987     return RValue::get(Load);
1988   }
1989 
1990   case Builtin::BI__exception_code:
1991   case Builtin::BI_exception_code:
1992     return RValue::get(EmitSEHExceptionCode());
1993   case Builtin::BI__exception_info:
1994   case Builtin::BI_exception_info:
1995     return RValue::get(EmitSEHExceptionInfo());
1996   case Builtin::BI__abnormal_termination:
1997   case Builtin::BI_abnormal_termination:
1998     return RValue::get(EmitSEHAbnormalTermination());
1999   case Builtin::BI_setjmpex: {
2000     if (getTarget().getTriple().isOSMSVCRT()) {
2001       llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2002       llvm::AttributeSet ReturnsTwiceAttr =
2003           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2004                             llvm::Attribute::ReturnsTwice);
2005       llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2006           llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2007           "_setjmpex", ReturnsTwiceAttr);
2008       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2009           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2010       llvm::Value *FrameAddr =
2011           Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2012                              ConstantInt::get(Int32Ty, 0));
2013       llvm::Value *Args[] = {Buf, FrameAddr};
2014       llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2015       CS.setAttributes(ReturnsTwiceAttr);
2016       return RValue::get(CS.getInstruction());
2017     }
2018     break;
2019   }
2020   case Builtin::BI_setjmp: {
2021     if (getTarget().getTriple().isOSMSVCRT()) {
2022       llvm::AttributeSet ReturnsTwiceAttr =
2023           AttributeSet::get(getLLVMContext(), llvm::AttributeSet::FunctionIndex,
2024                             llvm::Attribute::ReturnsTwice);
2025       llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2026           EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2027       llvm::CallSite CS;
2028       if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2029         llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2030         llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2031             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2032             "_setjmp3", ReturnsTwiceAttr);
2033         llvm::Value *Count = ConstantInt::get(IntTy, 0);
2034         llvm::Value *Args[] = {Buf, Count};
2035         CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2036       } else {
2037         llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2038         llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2039             llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2040             "_setjmp", ReturnsTwiceAttr);
2041         llvm::Value *FrameAddr =
2042             Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2043                                ConstantInt::get(Int32Ty, 0));
2044         llvm::Value *Args[] = {Buf, FrameAddr};
2045         CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2046       }
2047       CS.setAttributes(ReturnsTwiceAttr);
2048       return RValue::get(CS.getInstruction());
2049     }
2050     break;
2051   }
2052 
2053   case Builtin::BI__GetExceptionInfo: {
2054     if (llvm::GlobalVariable *GV =
2055             CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2056       return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2057     break;
2058   }
2059 
2060   // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2061   case Builtin::BIread_pipe:
2062   case Builtin::BIwrite_pipe: {
2063     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2064           *Arg1 = EmitScalarExpr(E->getArg(1));
2065 
2066     // Type of the generic packet parameter.
2067     unsigned GenericAS =
2068         getContext().getTargetAddressSpace(LangAS::opencl_generic);
2069     llvm::Type *I8PTy = llvm::PointerType::get(
2070         llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2071 
2072     // Testing which overloaded version we should generate the call for.
2073     if (2U == E->getNumArgs()) {
2074       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2075                                                              : "__write_pipe_2";
2076       // Creating a generic function type to be able to call with any builtin or
2077       // user defined type.
2078       llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy};
2079       llvm::FunctionType *FTy = llvm::FunctionType::get(
2080           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2081       Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2082       return RValue::get(Builder.CreateCall(
2083           CGM.CreateRuntimeFunction(FTy, Name), {Arg0, BCast}));
2084     } else {
2085       assert(4 == E->getNumArgs() &&
2086              "Illegal number of parameters to pipe function");
2087       const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2088                                                              : "__write_pipe_4";
2089 
2090       llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy};
2091       Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2092             *Arg3 = EmitScalarExpr(E->getArg(3));
2093       llvm::FunctionType *FTy = llvm::FunctionType::get(
2094           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2095       Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2096       // We know the third argument is an integer type, but we may need to cast
2097       // it to i32.
2098       if (Arg2->getType() != Int32Ty)
2099         Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2100       return RValue::get(Builder.CreateCall(
2101           CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1, Arg2, BCast}));
2102     }
2103   }
2104   // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2105   // functions
2106   case Builtin::BIreserve_read_pipe:
2107   case Builtin::BIreserve_write_pipe:
2108   case Builtin::BIwork_group_reserve_read_pipe:
2109   case Builtin::BIwork_group_reserve_write_pipe:
2110   case Builtin::BIsub_group_reserve_read_pipe:
2111   case Builtin::BIsub_group_reserve_write_pipe: {
2112     // Composing the mangled name for the function.
2113     const char *Name;
2114     if (BuiltinID == Builtin::BIreserve_read_pipe)
2115       Name = "__reserve_read_pipe";
2116     else if (BuiltinID == Builtin::BIreserve_write_pipe)
2117       Name = "__reserve_write_pipe";
2118     else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2119       Name = "__work_group_reserve_read_pipe";
2120     else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2121       Name = "__work_group_reserve_write_pipe";
2122     else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2123       Name = "__sub_group_reserve_read_pipe";
2124     else
2125       Name = "__sub_group_reserve_write_pipe";
2126 
2127     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2128           *Arg1 = EmitScalarExpr(E->getArg(1));
2129     llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2130 
2131     // Building the generic function prototype.
2132     llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty};
2133     llvm::FunctionType *FTy = llvm::FunctionType::get(
2134         ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2135     // We know the second argument is an integer type, but we may need to cast
2136     // it to i32.
2137     if (Arg1->getType() != Int32Ty)
2138       Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2139     return RValue::get(
2140         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2141   }
2142   // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2143   // functions
2144   case Builtin::BIcommit_read_pipe:
2145   case Builtin::BIcommit_write_pipe:
2146   case Builtin::BIwork_group_commit_read_pipe:
2147   case Builtin::BIwork_group_commit_write_pipe:
2148   case Builtin::BIsub_group_commit_read_pipe:
2149   case Builtin::BIsub_group_commit_write_pipe: {
2150     const char *Name;
2151     if (BuiltinID == Builtin::BIcommit_read_pipe)
2152       Name = "__commit_read_pipe";
2153     else if (BuiltinID == Builtin::BIcommit_write_pipe)
2154       Name = "__commit_write_pipe";
2155     else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2156       Name = "__work_group_commit_read_pipe";
2157     else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2158       Name = "__work_group_commit_write_pipe";
2159     else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2160       Name = "__sub_group_commit_read_pipe";
2161     else
2162       Name = "__sub_group_commit_write_pipe";
2163 
2164     Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2165           *Arg1 = EmitScalarExpr(E->getArg(1));
2166 
2167     // Building the generic function prototype.
2168     llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType()};
2169     llvm::FunctionType *FTy =
2170         llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2171                                 llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2172 
2173     return RValue::get(
2174         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0, Arg1}));
2175   }
2176   // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2177   case Builtin::BIget_pipe_num_packets:
2178   case Builtin::BIget_pipe_max_packets: {
2179     const char *Name;
2180     if (BuiltinID == Builtin::BIget_pipe_num_packets)
2181       Name = "__get_pipe_num_packets";
2182     else
2183       Name = "__get_pipe_max_packets";
2184 
2185     // Building the generic function prototype.
2186     Value *Arg0 = EmitScalarExpr(E->getArg(0));
2187     llvm::Type *ArgTys[] = {Arg0->getType()};
2188     llvm::FunctionType *FTy = llvm::FunctionType::get(
2189         Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2190 
2191     return RValue::get(
2192         Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name), {Arg0}));
2193   }
2194 
2195   // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2196   case Builtin::BIto_global:
2197   case Builtin::BIto_local:
2198   case Builtin::BIto_private: {
2199     auto Arg0 = EmitScalarExpr(E->getArg(0));
2200     auto NewArgT = llvm::PointerType::get(Int8Ty,
2201       CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2202     auto NewRetT = llvm::PointerType::get(Int8Ty,
2203       CGM.getContext().getTargetAddressSpace(
2204         E->getType()->getPointeeType().getAddressSpace()));
2205     auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2206     llvm::Value *NewArg;
2207     if (Arg0->getType()->getPointerAddressSpace() !=
2208         NewArgT->getPointerAddressSpace())
2209       NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2210     else
2211       NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2212     auto NewCall = Builder.CreateCall(CGM.CreateRuntimeFunction(FTy,
2213       E->getDirectCallee()->getName()), {NewArg});
2214     return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2215       ConvertType(E->getType())));
2216   }
2217 
2218   // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2219   // It contains four different overload formats specified in Table 6.13.17.1.
2220   case Builtin::BIenqueue_kernel: {
2221     StringRef Name; // Generated function call name
2222     unsigned NumArgs = E->getNumArgs();
2223 
2224     llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2225     llvm::Type *RangeTy = ConvertType(getContext().OCLNDRangeTy);
2226 
2227     llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2228     llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2229     llvm::Value *Range = EmitScalarExpr(E->getArg(2));
2230 
2231     if (NumArgs == 4) {
2232       // The most basic form of the call with parameters:
2233       // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2234       Name = "__enqueue_kernel_basic";
2235       llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, Int8PtrTy};
2236       llvm::FunctionType *FTy = llvm::FunctionType::get(
2237           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2238 
2239       llvm::Value *Block =
2240           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2241 
2242       return RValue::get(Builder.CreateCall(
2243           CGM.CreateRuntimeFunction(FTy, Name), {Queue, Flags, Range, Block}));
2244     }
2245     assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2246 
2247     // Could have events and/or vaargs.
2248     if (E->getArg(3)->getType()->isBlockPointerType()) {
2249       // No events passed, but has variadic arguments.
2250       Name = "__enqueue_kernel_vaargs";
2251       llvm::Value *Block =
2252           Builder.CreateBitCast(EmitScalarExpr(E->getArg(3)), Int8PtrTy);
2253       // Create a vector of the arguments, as well as a constant value to
2254       // express to the runtime the number of variadic arguments.
2255       std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2256                                          ConstantInt::get(IntTy, NumArgs - 4)};
2257       std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy, Int8PtrTy,
2258                                           IntTy};
2259 
2260       // Add the variadics.
2261       for (unsigned I = 4; I < NumArgs; ++I) {
2262         llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2263         unsigned TypeSizeInBytes =
2264             getContext()
2265                 .getTypeSizeInChars(E->getArg(I)->getType())
2266                 .getQuantity();
2267         Args.push_back(TypeSizeInBytes < 4
2268                            ? Builder.CreateZExt(ArgSize, Int32Ty)
2269                            : ArgSize);
2270       }
2271 
2272       llvm::FunctionType *FTy = llvm::FunctionType::get(
2273           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2274       return RValue::get(
2275           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2276                              llvm::ArrayRef<llvm::Value *>(Args)));
2277     }
2278     // Any calls now have event arguments passed.
2279     if (NumArgs >= 7) {
2280       llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2281       unsigned AS4 =
2282           E->getArg(4)->getType()->isArrayType()
2283               ? E->getArg(4)->getType().getAddressSpace()
2284               : E->getArg(4)->getType()->getPointeeType().getAddressSpace();
2285       llvm::Type *EventPtrAS4Ty =
2286           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS4));
2287       unsigned AS5 =
2288           E->getArg(5)->getType()->getPointeeType().getAddressSpace();
2289       llvm::Type *EventPtrAS5Ty =
2290           EventTy->getPointerTo(CGM.getContext().getTargetAddressSpace(AS5));
2291 
2292       llvm::Value *NumEvents = EmitScalarExpr(E->getArg(3));
2293       llvm::Value *EventList =
2294           E->getArg(4)->getType()->isArrayType()
2295               ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2296               : EmitScalarExpr(E->getArg(4));
2297       llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2298       llvm::Value *Block =
2299           Builder.CreateBitCast(EmitScalarExpr(E->getArg(6)), Int8PtrTy);
2300 
2301       std::vector<llvm::Type *> ArgTys = {
2302           QueueTy,       Int32Ty,       RangeTy,  Int32Ty,
2303           EventPtrAS4Ty, EventPtrAS5Ty, Int8PtrTy};
2304       std::vector<llvm::Value *> Args = {Queue,     Flags,    Range, NumEvents,
2305                                          EventList, ClkEvent, Block};
2306 
2307       if (NumArgs == 7) {
2308         // Has events but no variadics.
2309         Name = "__enqueue_kernel_basic_events";
2310         llvm::FunctionType *FTy = llvm::FunctionType::get(
2311             Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2312         return RValue::get(
2313             Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2314                                llvm::ArrayRef<llvm::Value *>(Args)));
2315       }
2316       // Has event info and variadics
2317       // Pass the number of variadics to the runtime function too.
2318       Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2319       ArgTys.push_back(Int32Ty);
2320       Name = "__enqueue_kernel_events_vaargs";
2321 
2322       // Add the variadics.
2323       for (unsigned I = 7; I < NumArgs; ++I) {
2324         llvm::Value *ArgSize = EmitScalarExpr(E->getArg(I));
2325         unsigned TypeSizeInBytes =
2326             getContext()
2327                 .getTypeSizeInChars(E->getArg(I)->getType())
2328                 .getQuantity();
2329         Args.push_back(TypeSizeInBytes < 4
2330                            ? Builder.CreateZExt(ArgSize, Int32Ty)
2331                            : ArgSize);
2332       }
2333       llvm::FunctionType *FTy = llvm::FunctionType::get(
2334           Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2335       return RValue::get(
2336           Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2337                              llvm::ArrayRef<llvm::Value *>(Args)));
2338     }
2339   }
2340   // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2341   // parameter.
2342   case Builtin::BIget_kernel_work_group_size: {
2343     Value *Arg = EmitScalarExpr(E->getArg(0));
2344     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2345     return RValue::get(
2346         Builder.CreateCall(CGM.CreateRuntimeFunction(
2347                                llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2348                                "__get_kernel_work_group_size_impl"),
2349                            Arg));
2350   }
2351   case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2352     Value *Arg = EmitScalarExpr(E->getArg(0));
2353     Arg = Builder.CreateBitCast(Arg, Int8PtrTy);
2354     return RValue::get(Builder.CreateCall(
2355         CGM.CreateRuntimeFunction(
2356             llvm::FunctionType::get(IntTy, Int8PtrTy, false),
2357             "__get_kernel_preferred_work_group_multiple_impl"),
2358         Arg));
2359   }
2360   case Builtin::BIprintf:
2361     if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice)
2362       return EmitCUDADevicePrintfCallExpr(E, ReturnValue);
2363     break;
2364   case Builtin::BI__builtin_canonicalize:
2365   case Builtin::BI__builtin_canonicalizef:
2366   case Builtin::BI__builtin_canonicalizel:
2367     return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2368 
2369   case Builtin::BI__builtin_thread_pointer: {
2370     if (!getContext().getTargetInfo().isTLSSupported())
2371       CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2372     // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2373     break;
2374   }
2375   }
2376 
2377   // If this is an alias for a lib function (e.g. __builtin_sin), emit
2378   // the call using the normal call path, but using the unmangled
2379   // version of the function name.
2380   if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2381     return emitLibraryCall(*this, FD, E,
2382                            CGM.getBuiltinLibFunction(FD, BuiltinID));
2383 
2384   // If this is a predefined lib function (e.g. malloc), emit the call
2385   // using exactly the normal call path.
2386   if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2387     return emitLibraryCall(*this, FD, E, EmitScalarExpr(E->getCallee()));
2388 
2389   // Check that a call to a target specific builtin has the correct target
2390   // features.
2391   // This is down here to avoid non-target specific builtins, however, if
2392   // generic builtins start to require generic target features then we
2393   // can move this up to the beginning of the function.
2394   checkTargetFeatures(E, FD);
2395 
2396   // See if we have a target specific intrinsic.
2397   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2398   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2399   if (const char *Prefix =
2400           llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch())) {
2401     IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix, Name);
2402     // NOTE we dont need to perform a compatibility flag check here since the
2403     // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2404     // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2405     if (IntrinsicID == Intrinsic::not_intrinsic)
2406       IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix, Name);
2407   }
2408 
2409   if (IntrinsicID != Intrinsic::not_intrinsic) {
2410     SmallVector<Value*, 16> Args;
2411 
2412     // Find out if any arguments are required to be integer constant
2413     // expressions.
2414     unsigned ICEArguments = 0;
2415     ASTContext::GetBuiltinTypeError Error;
2416     getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2417     assert(Error == ASTContext::GE_None && "Should not codegen an error");
2418 
2419     Function *F = CGM.getIntrinsic(IntrinsicID);
2420     llvm::FunctionType *FTy = F->getFunctionType();
2421 
2422     for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2423       Value *ArgValue;
2424       // If this is a normal argument, just emit it as a scalar.
2425       if ((ICEArguments & (1 << i)) == 0) {
2426         ArgValue = EmitScalarExpr(E->getArg(i));
2427       } else {
2428         // If this is required to be a constant, constant fold it so that we
2429         // know that the generated intrinsic gets a ConstantInt.
2430         llvm::APSInt Result;
2431         bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2432         assert(IsConst && "Constant arg isn't actually constant?");
2433         (void)IsConst;
2434         ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2435       }
2436 
2437       // If the intrinsic arg type is different from the builtin arg type
2438       // we need to do a bit cast.
2439       llvm::Type *PTy = FTy->getParamType(i);
2440       if (PTy != ArgValue->getType()) {
2441         assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2442                "Must be able to losslessly bit cast to param");
2443         ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2444       }
2445 
2446       Args.push_back(ArgValue);
2447     }
2448 
2449     Value *V = Builder.CreateCall(F, Args);
2450     QualType BuiltinRetType = E->getType();
2451 
2452     llvm::Type *RetTy = VoidTy;
2453     if (!BuiltinRetType->isVoidType())
2454       RetTy = ConvertType(BuiltinRetType);
2455 
2456     if (RetTy != V->getType()) {
2457       assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2458              "Must be able to losslessly bit cast result type");
2459       V = Builder.CreateBitCast(V, RetTy);
2460     }
2461 
2462     return RValue::get(V);
2463   }
2464 
2465   // See if we have a target specific builtin that needs to be lowered.
2466   if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2467     return RValue::get(V);
2468 
2469   ErrorUnsupported(E, "builtin function");
2470 
2471   // Unknown builtin, for now just dump it out and return undef.
2472   return GetUndefRValue(E->getType());
2473 }
2474 
EmitTargetArchBuiltinExpr(CodeGenFunction * CGF,unsigned BuiltinID,const CallExpr * E,llvm::Triple::ArchType Arch)2475 static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF,
2476                                         unsigned BuiltinID, const CallExpr *E,
2477                                         llvm::Triple::ArchType Arch) {
2478   switch (Arch) {
2479   case llvm::Triple::arm:
2480   case llvm::Triple::armeb:
2481   case llvm::Triple::thumb:
2482   case llvm::Triple::thumbeb:
2483     return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2484   case llvm::Triple::aarch64:
2485   case llvm::Triple::aarch64_be:
2486     return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2487   case llvm::Triple::x86:
2488   case llvm::Triple::x86_64:
2489     return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2490   case llvm::Triple::ppc:
2491   case llvm::Triple::ppc64:
2492   case llvm::Triple::ppc64le:
2493     return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2494   case llvm::Triple::r600:
2495   case llvm::Triple::amdgcn:
2496     return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2497   case llvm::Triple::systemz:
2498     return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2499   case llvm::Triple::nvptx:
2500   case llvm::Triple::nvptx64:
2501     return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2502   case llvm::Triple::wasm32:
2503   case llvm::Triple::wasm64:
2504     return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2505   default:
2506     return nullptr;
2507   }
2508 }
2509 
EmitTargetBuiltinExpr(unsigned BuiltinID,const CallExpr * E)2510 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2511                                               const CallExpr *E) {
2512   if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2513     assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2514     return EmitTargetArchBuiltinExpr(
2515         this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2516         getContext().getAuxTargetInfo()->getTriple().getArch());
2517   }
2518 
2519   return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2520                                    getTarget().getTriple().getArch());
2521 }
2522 
GetNeonType(CodeGenFunction * CGF,NeonTypeFlags TypeFlags,bool V1Ty=false)2523 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2524                                      NeonTypeFlags TypeFlags,
2525                                      bool V1Ty=false) {
2526   int IsQuad = TypeFlags.isQuad();
2527   switch (TypeFlags.getEltType()) {
2528   case NeonTypeFlags::Int8:
2529   case NeonTypeFlags::Poly8:
2530     return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2531   case NeonTypeFlags::Int16:
2532   case NeonTypeFlags::Poly16:
2533   case NeonTypeFlags::Float16:
2534     return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2535   case NeonTypeFlags::Int32:
2536     return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2537   case NeonTypeFlags::Int64:
2538   case NeonTypeFlags::Poly64:
2539     return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2540   case NeonTypeFlags::Poly128:
2541     // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2542     // There is a lot of i128 and f128 API missing.
2543     // so we use v16i8 to represent poly128 and get pattern matched.
2544     return llvm::VectorType::get(CGF->Int8Ty, 16);
2545   case NeonTypeFlags::Float32:
2546     return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
2547   case NeonTypeFlags::Float64:
2548     return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
2549   }
2550   llvm_unreachable("Unknown vector element type!");
2551 }
2552 
GetFloatNeonType(CodeGenFunction * CGF,NeonTypeFlags IntTypeFlags)2553 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
2554                                           NeonTypeFlags IntTypeFlags) {
2555   int IsQuad = IntTypeFlags.isQuad();
2556   switch (IntTypeFlags.getEltType()) {
2557   case NeonTypeFlags::Int32:
2558     return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
2559   case NeonTypeFlags::Int64:
2560     return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
2561   default:
2562     llvm_unreachable("Type can't be converted to floating-point!");
2563   }
2564 }
2565 
EmitNeonSplat(Value * V,Constant * C)2566 Value *CodeGenFunction::EmitNeonSplat(Value *V, Constant *C) {
2567   unsigned nElts = V->getType()->getVectorNumElements();
2568   Value* SV = llvm::ConstantVector::getSplat(nElts, C);
2569   return Builder.CreateShuffleVector(V, V, SV, "lane");
2570 }
2571 
EmitNeonCall(Function * F,SmallVectorImpl<Value * > & Ops,const char * name,unsigned shift,bool rightshift)2572 Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
2573                                      const char *name,
2574                                      unsigned shift, bool rightshift) {
2575   unsigned j = 0;
2576   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
2577        ai != ae; ++ai, ++j)
2578     if (shift > 0 && shift == j)
2579       Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
2580     else
2581       Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
2582 
2583   return Builder.CreateCall(F, Ops, name);
2584 }
2585 
EmitNeonShiftVector(Value * V,llvm::Type * Ty,bool neg)2586 Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
2587                                             bool neg) {
2588   int SV = cast<ConstantInt>(V)->getSExtValue();
2589   return ConstantInt::get(Ty, neg ? -SV : SV);
2590 }
2591 
2592 // \brief Right-shift a vector by a constant.
EmitNeonRShiftImm(Value * Vec,Value * Shift,llvm::Type * Ty,bool usgn,const char * name)2593 Value *CodeGenFunction::EmitNeonRShiftImm(Value *Vec, Value *Shift,
2594                                           llvm::Type *Ty, bool usgn,
2595                                           const char *name) {
2596   llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
2597 
2598   int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
2599   int EltSize = VTy->getScalarSizeInBits();
2600 
2601   Vec = Builder.CreateBitCast(Vec, Ty);
2602 
2603   // lshr/ashr are undefined when the shift amount is equal to the vector
2604   // element size.
2605   if (ShiftAmt == EltSize) {
2606     if (usgn) {
2607       // Right-shifting an unsigned value by its size yields 0.
2608       return llvm::ConstantAggregateZero::get(VTy);
2609     } else {
2610       // Right-shifting a signed value by its size is equivalent
2611       // to a shift of size-1.
2612       --ShiftAmt;
2613       Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
2614     }
2615   }
2616 
2617   Shift = EmitNeonShiftVector(Shift, Ty, false);
2618   if (usgn)
2619     return Builder.CreateLShr(Vec, Shift, name);
2620   else
2621     return Builder.CreateAShr(Vec, Shift, name);
2622 }
2623 
2624 enum {
2625   AddRetType = (1 << 0),
2626   Add1ArgType = (1 << 1),
2627   Add2ArgTypes = (1 << 2),
2628 
2629   VectorizeRetType = (1 << 3),
2630   VectorizeArgTypes = (1 << 4),
2631 
2632   InventFloatType = (1 << 5),
2633   UnsignedAlts = (1 << 6),
2634 
2635   Use64BitVectors = (1 << 7),
2636   Use128BitVectors = (1 << 8),
2637 
2638   Vectorize1ArgType = Add1ArgType | VectorizeArgTypes,
2639   VectorRet = AddRetType | VectorizeRetType,
2640   VectorRetGetArgs01 =
2641       AddRetType | Add2ArgTypes | VectorizeRetType | VectorizeArgTypes,
2642   FpCmpzModifiers =
2643       AddRetType | VectorizeRetType | Add1ArgType | InventFloatType
2644 };
2645 
2646 namespace {
2647 struct NeonIntrinsicInfo {
2648   const char *NameHint;
2649   unsigned BuiltinID;
2650   unsigned LLVMIntrinsic;
2651   unsigned AltLLVMIntrinsic;
2652   unsigned TypeModifier;
2653 
operator <__anonc5531e210311::NeonIntrinsicInfo2654   bool operator<(unsigned RHSBuiltinID) const {
2655     return BuiltinID < RHSBuiltinID;
2656   }
operator <__anonc5531e210311::NeonIntrinsicInfo2657   bool operator<(const NeonIntrinsicInfo &TE) const {
2658     return BuiltinID < TE.BuiltinID;
2659   }
2660 };
2661 } // end anonymous namespace
2662 
2663 #define NEONMAP0(NameBase) \
2664   { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
2665 
2666 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
2667   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2668       Intrinsic::LLVMIntrinsic, 0, TypeModifier }
2669 
2670 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
2671   { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
2672       Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
2673       TypeModifier }
2674 
2675 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
2676   NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2677   NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
2678   NEONMAP1(vabs_v, arm_neon_vabs, 0),
2679   NEONMAP1(vabsq_v, arm_neon_vabs, 0),
2680   NEONMAP0(vaddhn_v),
2681   NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
2682   NEONMAP1(vaeseq_v, arm_neon_aese, 0),
2683   NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
2684   NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
2685   NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
2686   NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
2687   NEONMAP1(vcage_v, arm_neon_vacge, 0),
2688   NEONMAP1(vcageq_v, arm_neon_vacge, 0),
2689   NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
2690   NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
2691   NEONMAP1(vcale_v, arm_neon_vacge, 0),
2692   NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
2693   NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
2694   NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
2695   NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
2696   NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
2697   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2698   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2699   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2700   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2701   NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
2702   NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
2703   NEONMAP0(vcvt_f32_v),
2704   NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2705   NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2706   NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2707   NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2708   NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2709   NEONMAP0(vcvt_s32_v),
2710   NEONMAP0(vcvt_s64_v),
2711   NEONMAP0(vcvt_u32_v),
2712   NEONMAP0(vcvt_u64_v),
2713   NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
2714   NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
2715   NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
2716   NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
2717   NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
2718   NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
2719   NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
2720   NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
2721   NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
2722   NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
2723   NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
2724   NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
2725   NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
2726   NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
2727   NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
2728   NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
2729   NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
2730   NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
2731   NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
2732   NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
2733   NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
2734   NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
2735   NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
2736   NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
2737   NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
2738   NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
2739   NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
2740   NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
2741   NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
2742   NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
2743   NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
2744   NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
2745   NEONMAP0(vcvtq_f32_v),
2746   NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
2747   NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
2748   NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
2749   NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
2750   NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
2751   NEONMAP0(vcvtq_s32_v),
2752   NEONMAP0(vcvtq_s64_v),
2753   NEONMAP0(vcvtq_u32_v),
2754   NEONMAP0(vcvtq_u64_v),
2755   NEONMAP0(vext_v),
2756   NEONMAP0(vextq_v),
2757   NEONMAP0(vfma_v),
2758   NEONMAP0(vfmaq_v),
2759   NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2760   NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
2761   NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2762   NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
2763   NEONMAP0(vld1_dup_v),
2764   NEONMAP1(vld1_v, arm_neon_vld1, 0),
2765   NEONMAP0(vld1q_dup_v),
2766   NEONMAP1(vld1q_v, arm_neon_vld1, 0),
2767   NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
2768   NEONMAP1(vld2_v, arm_neon_vld2, 0),
2769   NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
2770   NEONMAP1(vld2q_v, arm_neon_vld2, 0),
2771   NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
2772   NEONMAP1(vld3_v, arm_neon_vld3, 0),
2773   NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
2774   NEONMAP1(vld3q_v, arm_neon_vld3, 0),
2775   NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
2776   NEONMAP1(vld4_v, arm_neon_vld4, 0),
2777   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
2778   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
2779   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2780   NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
2781   NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
2782   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
2783   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2784   NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
2785   NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
2786   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
2787   NEONMAP0(vmovl_v),
2788   NEONMAP0(vmovn_v),
2789   NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
2790   NEONMAP0(vmull_v),
2791   NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
2792   NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2793   NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
2794   NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
2795   NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2796   NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
2797   NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
2798   NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
2799   NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
2800   NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
2801   NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
2802   NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2803   NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
2804   NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
2805   NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
2806   NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
2807   NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
2808   NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
2809   NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
2810   NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
2811   NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
2812   NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
2813   NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
2814   NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
2815   NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2816   NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
2817   NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2818   NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2819   NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
2820   NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
2821   NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
2822   NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
2823   NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2824   NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
2825   NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
2826   NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2827   NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
2828   NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
2829   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
2830   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2831   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
2832   NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
2833   NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
2834   NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
2835   NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
2836   NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
2837   NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
2838   NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
2839   NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
2840   NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
2841   NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
2842   NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
2843   NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
2844   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2845   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
2846   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2847   NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
2848   NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2849   NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
2850   NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
2851   NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
2852   NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
2853   NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
2854   NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
2855   NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
2856   NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
2857   NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
2858   NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
2859   NEONMAP0(vshl_n_v),
2860   NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2861   NEONMAP0(vshll_n_v),
2862   NEONMAP0(vshlq_n_v),
2863   NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
2864   NEONMAP0(vshr_n_v),
2865   NEONMAP0(vshrn_n_v),
2866   NEONMAP0(vshrq_n_v),
2867   NEONMAP1(vst1_v, arm_neon_vst1, 0),
2868   NEONMAP1(vst1q_v, arm_neon_vst1, 0),
2869   NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
2870   NEONMAP1(vst2_v, arm_neon_vst2, 0),
2871   NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
2872   NEONMAP1(vst2q_v, arm_neon_vst2, 0),
2873   NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
2874   NEONMAP1(vst3_v, arm_neon_vst3, 0),
2875   NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
2876   NEONMAP1(vst3q_v, arm_neon_vst3, 0),
2877   NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
2878   NEONMAP1(vst4_v, arm_neon_vst4, 0),
2879   NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
2880   NEONMAP1(vst4q_v, arm_neon_vst4, 0),
2881   NEONMAP0(vsubhn_v),
2882   NEONMAP0(vtrn_v),
2883   NEONMAP0(vtrnq_v),
2884   NEONMAP0(vtst_v),
2885   NEONMAP0(vtstq_v),
2886   NEONMAP0(vuzp_v),
2887   NEONMAP0(vuzpq_v),
2888   NEONMAP0(vzip_v),
2889   NEONMAP0(vzipq_v)
2890 };
2891 
2892 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
2893   NEONMAP1(vabs_v, aarch64_neon_abs, 0),
2894   NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
2895   NEONMAP0(vaddhn_v),
2896   NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
2897   NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
2898   NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
2899   NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
2900   NEONMAP1(vcage_v, aarch64_neon_facge, 0),
2901   NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
2902   NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
2903   NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
2904   NEONMAP1(vcale_v, aarch64_neon_facge, 0),
2905   NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
2906   NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
2907   NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
2908   NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
2909   NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
2910   NEONMAP1(vclz_v, ctlz, Add1ArgType),
2911   NEONMAP1(vclzq_v, ctlz, Add1ArgType),
2912   NEONMAP1(vcnt_v, ctpop, Add1ArgType),
2913   NEONMAP1(vcntq_v, ctpop, Add1ArgType),
2914   NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
2915   NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
2916   NEONMAP0(vcvt_f32_v),
2917   NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2918   NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2919   NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2920   NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2921   NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2922   NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2923   NEONMAP0(vcvtq_f32_v),
2924   NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2925   NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
2926   NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
2927   NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
2928   NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
2929   NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
2930   NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
2931   NEONMAP0(vext_v),
2932   NEONMAP0(vextq_v),
2933   NEONMAP0(vfma_v),
2934   NEONMAP0(vfmaq_v),
2935   NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2936   NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
2937   NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2938   NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
2939   NEONMAP0(vmovl_v),
2940   NEONMAP0(vmovn_v),
2941   NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
2942   NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
2943   NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
2944   NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2945   NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
2946   NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
2947   NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
2948   NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
2949   NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2950   NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
2951   NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
2952   NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
2953   NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
2954   NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
2955   NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
2956   NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
2957   NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
2958   NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
2959   NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
2960   NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
2961   NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
2962   NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2963   NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
2964   NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
2965   NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2966   NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
2967   NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
2968   NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
2969   NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
2970   NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2971   NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
2972   NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
2973   NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2974   NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
2975   NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
2976   NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
2977   NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2978   NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
2979   NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2980   NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
2981   NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2982   NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
2983   NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2984   NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
2985   NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
2986   NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
2987   NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
2988   NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
2989   NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
2990   NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
2991   NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
2992   NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
2993   NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
2994   NEONMAP0(vshl_n_v),
2995   NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2996   NEONMAP0(vshll_n_v),
2997   NEONMAP0(vshlq_n_v),
2998   NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
2999   NEONMAP0(vshr_n_v),
3000   NEONMAP0(vshrn_n_v),
3001   NEONMAP0(vshrq_n_v),
3002   NEONMAP0(vsubhn_v),
3003   NEONMAP0(vtst_v),
3004   NEONMAP0(vtstq_v),
3005 };
3006 
3007 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3008   NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3009   NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3010   NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3011   NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3012   NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3013   NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3014   NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3015   NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3016   NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3017   NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3018   NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3019   NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3020   NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3021   NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3022   NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3023   NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3024   NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3025   NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3026   NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3027   NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3028   NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3029   NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3030   NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3031   NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3032   NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3033   NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3034   NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3035   NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3036   NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3037   NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3038   NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3039   NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3040   NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3041   NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3042   NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3043   NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3044   NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3045   NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3046   NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3047   NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3048   NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3049   NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3050   NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3051   NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3052   NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3053   NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3054   NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3055   NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3056   NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3057   NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3058   NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3059   NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3060   NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3061   NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3062   NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3063   NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3064   NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3065   NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3066   NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3067   NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3068   NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3069   NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3070   NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3071   NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3072   NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3073   NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3074   NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3075   NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3076   NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3077   NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3078   NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3079   NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3080   NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3081   NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3082   NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3083   NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3084   NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3085   NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3086   NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3087   NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3088   NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3089   NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3090   NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3091   NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3092   NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3093   NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3094   NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3095   NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3096   NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3097   NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3098   NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3099   NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3100   NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3101   NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3102   NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3103   NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3104   NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3105   NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3106   NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3107   NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3108   NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3109   NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3110   NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3111   NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3112   NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3113   NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3114   NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3115   NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3116   NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3117   NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3118   NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3119   NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3120   NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3121   NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3122   NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3123   NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3124   NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3125   NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3126   NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3127   NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3128   NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3129   NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3130   NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3131   NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3132   NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3133   NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3134   NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3135   NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3136   NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3137   NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3138   NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3139   NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3140   NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3141   NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3142   NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3143   NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3144   NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3145   NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3146   NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3147   NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3148   NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3149   NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3150   NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3151   NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3152   NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3153   NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3154   NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3155   NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3156   NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3157   NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3158   NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3159   NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3160   NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3161   NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3162   NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3163   NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3164   NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3165   NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3166   NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3167   NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3168   NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3169   NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3170   NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3171   NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3172   NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3173   NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3174   NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3175   NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3176   NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3177   NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3178   NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3179   NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3180   NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3181   NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3182   NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3183   NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3184   NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3185   NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3186   NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3187   NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3188   NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3189   NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3190   NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3191   NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3192   NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3193   NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3194   NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3195   NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3196   NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3197   NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3198   NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3199   NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3200 };
3201 
3202 #undef NEONMAP0
3203 #undef NEONMAP1
3204 #undef NEONMAP2
3205 
3206 static bool NEONSIMDIntrinsicsProvenSorted = false;
3207 
3208 static bool AArch64SIMDIntrinsicsProvenSorted = false;
3209 static bool AArch64SISDIntrinsicsProvenSorted = false;
3210 
3211 
3212 static const NeonIntrinsicInfo *
findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,unsigned BuiltinID,bool & MapProvenSorted)3213 findNeonIntrinsicInMap(ArrayRef<NeonIntrinsicInfo> IntrinsicMap,
3214                        unsigned BuiltinID, bool &MapProvenSorted) {
3215 
3216 #ifndef NDEBUG
3217   if (!MapProvenSorted) {
3218     assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3219     MapProvenSorted = true;
3220   }
3221 #endif
3222 
3223   const NeonIntrinsicInfo *Builtin =
3224       std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3225 
3226   if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3227     return Builtin;
3228 
3229   return nullptr;
3230 }
3231 
LookupNeonLLVMIntrinsic(unsigned IntrinsicID,unsigned Modifier,llvm::Type * ArgType,const CallExpr * E)3232 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3233                                                    unsigned Modifier,
3234                                                    llvm::Type *ArgType,
3235                                                    const CallExpr *E) {
3236   int VectorSize = 0;
3237   if (Modifier & Use64BitVectors)
3238     VectorSize = 64;
3239   else if (Modifier & Use128BitVectors)
3240     VectorSize = 128;
3241 
3242   // Return type.
3243   SmallVector<llvm::Type *, 3> Tys;
3244   if (Modifier & AddRetType) {
3245     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
3246     if (Modifier & VectorizeRetType)
3247       Ty = llvm::VectorType::get(
3248           Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3249 
3250     Tys.push_back(Ty);
3251   }
3252 
3253   // Arguments.
3254   if (Modifier & VectorizeArgTypes) {
3255     int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3256     ArgType = llvm::VectorType::get(ArgType, Elts);
3257   }
3258 
3259   if (Modifier & (Add1ArgType | Add2ArgTypes))
3260     Tys.push_back(ArgType);
3261 
3262   if (Modifier & Add2ArgTypes)
3263     Tys.push_back(ArgType);
3264 
3265   if (Modifier & InventFloatType)
3266     Tys.push_back(FloatTy);
3267 
3268   return CGM.getIntrinsic(IntrinsicID, Tys);
3269 }
3270 
EmitCommonNeonSISDBuiltinExpr(CodeGenFunction & CGF,const NeonIntrinsicInfo & SISDInfo,SmallVectorImpl<Value * > & Ops,const CallExpr * E)3271 static Value *EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF,
3272                                             const NeonIntrinsicInfo &SISDInfo,
3273                                             SmallVectorImpl<Value *> &Ops,
3274                                             const CallExpr *E) {
3275   unsigned BuiltinID = SISDInfo.BuiltinID;
3276   unsigned int Int = SISDInfo.LLVMIntrinsic;
3277   unsigned Modifier = SISDInfo.TypeModifier;
3278   const char *s = SISDInfo.NameHint;
3279 
3280   switch (BuiltinID) {
3281   case NEON::BI__builtin_neon_vcled_s64:
3282   case NEON::BI__builtin_neon_vcled_u64:
3283   case NEON::BI__builtin_neon_vcles_f32:
3284   case NEON::BI__builtin_neon_vcled_f64:
3285   case NEON::BI__builtin_neon_vcltd_s64:
3286   case NEON::BI__builtin_neon_vcltd_u64:
3287   case NEON::BI__builtin_neon_vclts_f32:
3288   case NEON::BI__builtin_neon_vcltd_f64:
3289   case NEON::BI__builtin_neon_vcales_f32:
3290   case NEON::BI__builtin_neon_vcaled_f64:
3291   case NEON::BI__builtin_neon_vcalts_f32:
3292   case NEON::BI__builtin_neon_vcaltd_f64:
3293     // Only one direction of comparisons actually exist, cmle is actually a cmge
3294     // with swapped operands. The table gives us the right intrinsic but we
3295     // still need to do the swap.
3296     std::swap(Ops[0], Ops[1]);
3297     break;
3298   }
3299 
3300   assert(Int && "Generic code assumes a valid intrinsic");
3301 
3302   // Determine the type(s) of this overloaded AArch64 intrinsic.
3303   const Expr *Arg = E->getArg(0);
3304   llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3305   Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3306 
3307   int j = 0;
3308   ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3309   for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3310        ai != ae; ++ai, ++j) {
3311     llvm::Type *ArgTy = ai->getType();
3312     if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3313              ArgTy->getPrimitiveSizeInBits())
3314       continue;
3315 
3316     assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3317     // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3318     // it before inserting.
3319     Ops[j] =
3320         CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3321     Ops[j] =
3322         CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3323   }
3324 
3325   Value *Result = CGF.EmitNeonCall(F, Ops, s);
3326   llvm::Type *ResultType = CGF.ConvertType(E->getType());
3327   if (ResultType->getPrimitiveSizeInBits() <
3328       Result->getType()->getPrimitiveSizeInBits())
3329     return CGF.Builder.CreateExtractElement(Result, C0);
3330 
3331   return CGF.Builder.CreateBitCast(Result, ResultType, s);
3332 }
3333 
EmitCommonNeonBuiltinExpr(unsigned BuiltinID,unsigned LLVMIntrinsic,unsigned AltLLVMIntrinsic,const char * NameHint,unsigned Modifier,const CallExpr * E,SmallVectorImpl<llvm::Value * > & Ops,Address PtrOp0,Address PtrOp1)3334 Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
3335     unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3336     const char *NameHint, unsigned Modifier, const CallExpr *E,
3337     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3338   // Get the last argument, which specifies the vector type.
3339   llvm::APSInt NeonTypeConst;
3340   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3341   if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3342     return nullptr;
3343 
3344   // Determine the type of this overloaded NEON intrinsic.
3345   NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3346   bool Usgn = Type.isUnsigned();
3347   bool Quad = Type.isQuad();
3348 
3349   llvm::VectorType *VTy = GetNeonType(this, Type);
3350   llvm::Type *Ty = VTy;
3351   if (!Ty)
3352     return nullptr;
3353 
3354   auto getAlignmentValue32 = [&](Address addr) -> Value* {
3355     return Builder.getInt32(addr.getAlignment().getQuantity());
3356   };
3357 
3358   unsigned Int = LLVMIntrinsic;
3359   if ((Modifier & UnsignedAlts) && !Usgn)
3360     Int = AltLLVMIntrinsic;
3361 
3362   switch (BuiltinID) {
3363   default: break;
3364   case NEON::BI__builtin_neon_vabs_v:
3365   case NEON::BI__builtin_neon_vabsq_v:
3366     if (VTy->getElementType()->isFloatingPointTy())
3367       return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3368     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3369   case NEON::BI__builtin_neon_vaddhn_v: {
3370     llvm::VectorType *SrcTy =
3371         llvm::VectorType::getExtendedElementVectorType(VTy);
3372 
3373     // %sum = add <4 x i32> %lhs, %rhs
3374     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3375     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3376     Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3377 
3378     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3379     Constant *ShiftAmt =
3380         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3381     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3382 
3383     // %res = trunc <4 x i32> %high to <4 x i16>
3384     return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3385   }
3386   case NEON::BI__builtin_neon_vcale_v:
3387   case NEON::BI__builtin_neon_vcaleq_v:
3388   case NEON::BI__builtin_neon_vcalt_v:
3389   case NEON::BI__builtin_neon_vcaltq_v:
3390     std::swap(Ops[0], Ops[1]);
3391   case NEON::BI__builtin_neon_vcage_v:
3392   case NEON::BI__builtin_neon_vcageq_v:
3393   case NEON::BI__builtin_neon_vcagt_v:
3394   case NEON::BI__builtin_neon_vcagtq_v: {
3395     llvm::Type *VecFlt = llvm::VectorType::get(
3396         VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3397         VTy->getNumElements());
3398     llvm::Type *Tys[] = { VTy, VecFlt };
3399     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3400     return EmitNeonCall(F, Ops, NameHint);
3401   }
3402   case NEON::BI__builtin_neon_vclz_v:
3403   case NEON::BI__builtin_neon_vclzq_v:
3404     // We generate target-independent intrinsic, which needs a second argument
3405     // for whether or not clz of zero is undefined; on ARM it isn't.
3406     Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3407     break;
3408   case NEON::BI__builtin_neon_vcvt_f32_v:
3409   case NEON::BI__builtin_neon_vcvtq_f32_v:
3410     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3411     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3412     return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3413                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3414   case NEON::BI__builtin_neon_vcvt_n_f32_v:
3415   case NEON::BI__builtin_neon_vcvt_n_f64_v:
3416   case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3417   case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3418     llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3419     Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3420     Function *F = CGM.getIntrinsic(Int, Tys);
3421     return EmitNeonCall(F, Ops, "vcvt_n");
3422   }
3423   case NEON::BI__builtin_neon_vcvt_n_s32_v:
3424   case NEON::BI__builtin_neon_vcvt_n_u32_v:
3425   case NEON::BI__builtin_neon_vcvt_n_s64_v:
3426   case NEON::BI__builtin_neon_vcvt_n_u64_v:
3427   case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3428   case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3429   case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3430   case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3431     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3432     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3433     return EmitNeonCall(F, Ops, "vcvt_n");
3434   }
3435   case NEON::BI__builtin_neon_vcvt_s32_v:
3436   case NEON::BI__builtin_neon_vcvt_u32_v:
3437   case NEON::BI__builtin_neon_vcvt_s64_v:
3438   case NEON::BI__builtin_neon_vcvt_u64_v:
3439   case NEON::BI__builtin_neon_vcvtq_s32_v:
3440   case NEON::BI__builtin_neon_vcvtq_u32_v:
3441   case NEON::BI__builtin_neon_vcvtq_s64_v:
3442   case NEON::BI__builtin_neon_vcvtq_u64_v: {
3443     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3444     return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3445                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3446   }
3447   case NEON::BI__builtin_neon_vcvta_s32_v:
3448   case NEON::BI__builtin_neon_vcvta_s64_v:
3449   case NEON::BI__builtin_neon_vcvta_u32_v:
3450   case NEON::BI__builtin_neon_vcvta_u64_v:
3451   case NEON::BI__builtin_neon_vcvtaq_s32_v:
3452   case NEON::BI__builtin_neon_vcvtaq_s64_v:
3453   case NEON::BI__builtin_neon_vcvtaq_u32_v:
3454   case NEON::BI__builtin_neon_vcvtaq_u64_v:
3455   case NEON::BI__builtin_neon_vcvtn_s32_v:
3456   case NEON::BI__builtin_neon_vcvtn_s64_v:
3457   case NEON::BI__builtin_neon_vcvtn_u32_v:
3458   case NEON::BI__builtin_neon_vcvtn_u64_v:
3459   case NEON::BI__builtin_neon_vcvtnq_s32_v:
3460   case NEON::BI__builtin_neon_vcvtnq_s64_v:
3461   case NEON::BI__builtin_neon_vcvtnq_u32_v:
3462   case NEON::BI__builtin_neon_vcvtnq_u64_v:
3463   case NEON::BI__builtin_neon_vcvtp_s32_v:
3464   case NEON::BI__builtin_neon_vcvtp_s64_v:
3465   case NEON::BI__builtin_neon_vcvtp_u32_v:
3466   case NEON::BI__builtin_neon_vcvtp_u64_v:
3467   case NEON::BI__builtin_neon_vcvtpq_s32_v:
3468   case NEON::BI__builtin_neon_vcvtpq_s64_v:
3469   case NEON::BI__builtin_neon_vcvtpq_u32_v:
3470   case NEON::BI__builtin_neon_vcvtpq_u64_v:
3471   case NEON::BI__builtin_neon_vcvtm_s32_v:
3472   case NEON::BI__builtin_neon_vcvtm_s64_v:
3473   case NEON::BI__builtin_neon_vcvtm_u32_v:
3474   case NEON::BI__builtin_neon_vcvtm_u64_v:
3475   case NEON::BI__builtin_neon_vcvtmq_s32_v:
3476   case NEON::BI__builtin_neon_vcvtmq_s64_v:
3477   case NEON::BI__builtin_neon_vcvtmq_u32_v:
3478   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3479     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3480     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3481   }
3482   case NEON::BI__builtin_neon_vext_v:
3483   case NEON::BI__builtin_neon_vextq_v: {
3484     int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3485     SmallVector<uint32_t, 16> Indices;
3486     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3487       Indices.push_back(i+CV);
3488 
3489     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3490     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3491     return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3492   }
3493   case NEON::BI__builtin_neon_vfma_v:
3494   case NEON::BI__builtin_neon_vfmaq_v: {
3495     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
3496     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3497     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3498     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3499 
3500     // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3501     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3502   }
3503   case NEON::BI__builtin_neon_vld1_v:
3504   case NEON::BI__builtin_neon_vld1q_v: {
3505     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3506     Ops.push_back(getAlignmentValue32(PtrOp0));
3507     return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3508   }
3509   case NEON::BI__builtin_neon_vld2_v:
3510   case NEON::BI__builtin_neon_vld2q_v:
3511   case NEON::BI__builtin_neon_vld3_v:
3512   case NEON::BI__builtin_neon_vld3q_v:
3513   case NEON::BI__builtin_neon_vld4_v:
3514   case NEON::BI__builtin_neon_vld4q_v: {
3515     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3516     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3517     Value *Align = getAlignmentValue32(PtrOp1);
3518     Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3519     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3520     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3521     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3522   }
3523   case NEON::BI__builtin_neon_vld1_dup_v:
3524   case NEON::BI__builtin_neon_vld1q_dup_v: {
3525     Value *V = UndefValue::get(Ty);
3526     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3527     PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3528     LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3529     llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3530     Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3531     return EmitNeonSplat(Ops[0], CI);
3532   }
3533   case NEON::BI__builtin_neon_vld2_lane_v:
3534   case NEON::BI__builtin_neon_vld2q_lane_v:
3535   case NEON::BI__builtin_neon_vld3_lane_v:
3536   case NEON::BI__builtin_neon_vld3q_lane_v:
3537   case NEON::BI__builtin_neon_vld4_lane_v:
3538   case NEON::BI__builtin_neon_vld4q_lane_v: {
3539     llvm::Type *Tys[] = {Ty, Int8PtrTy};
3540     Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3541     for (unsigned I = 2; I < Ops.size() - 1; ++I)
3542       Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3543     Ops.push_back(getAlignmentValue32(PtrOp1));
3544     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3545     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3546     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3547     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3548   }
3549   case NEON::BI__builtin_neon_vmovl_v: {
3550     llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
3551     Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
3552     if (Usgn)
3553       return Builder.CreateZExt(Ops[0], Ty, "vmovl");
3554     return Builder.CreateSExt(Ops[0], Ty, "vmovl");
3555   }
3556   case NEON::BI__builtin_neon_vmovn_v: {
3557     llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3558     Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
3559     return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
3560   }
3561   case NEON::BI__builtin_neon_vmull_v:
3562     // FIXME: the integer vmull operations could be emitted in terms of pure
3563     // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
3564     // hoisting the exts outside loops. Until global ISel comes along that can
3565     // see through such movement this leads to bad CodeGen. So we need an
3566     // intrinsic for now.
3567     Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
3568     Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
3569     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
3570   case NEON::BI__builtin_neon_vpadal_v:
3571   case NEON::BI__builtin_neon_vpadalq_v: {
3572     // The source operand type has twice as many elements of half the size.
3573     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3574     llvm::Type *EltTy =
3575       llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3576     llvm::Type *NarrowTy =
3577       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3578     llvm::Type *Tys[2] = { Ty, NarrowTy };
3579     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
3580   }
3581   case NEON::BI__builtin_neon_vpaddl_v:
3582   case NEON::BI__builtin_neon_vpaddlq_v: {
3583     // The source operand type has twice as many elements of half the size.
3584     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
3585     llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
3586     llvm::Type *NarrowTy =
3587       llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
3588     llvm::Type *Tys[2] = { Ty, NarrowTy };
3589     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
3590   }
3591   case NEON::BI__builtin_neon_vqdmlal_v:
3592   case NEON::BI__builtin_neon_vqdmlsl_v: {
3593     SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
3594     Ops[1] =
3595         EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
3596     Ops.resize(2);
3597     return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
3598   }
3599   case NEON::BI__builtin_neon_vqshl_n_v:
3600   case NEON::BI__builtin_neon_vqshlq_n_v:
3601     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
3602                         1, false);
3603   case NEON::BI__builtin_neon_vqshlu_n_v:
3604   case NEON::BI__builtin_neon_vqshluq_n_v:
3605     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
3606                         1, false);
3607   case NEON::BI__builtin_neon_vrecpe_v:
3608   case NEON::BI__builtin_neon_vrecpeq_v:
3609   case NEON::BI__builtin_neon_vrsqrte_v:
3610   case NEON::BI__builtin_neon_vrsqrteq_v:
3611     Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
3612     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
3613 
3614   case NEON::BI__builtin_neon_vrshr_n_v:
3615   case NEON::BI__builtin_neon_vrshrq_n_v:
3616     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
3617                         1, true);
3618   case NEON::BI__builtin_neon_vshl_n_v:
3619   case NEON::BI__builtin_neon_vshlq_n_v:
3620     Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
3621     return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
3622                              "vshl_n");
3623   case NEON::BI__builtin_neon_vshll_n_v: {
3624     llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
3625     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3626     if (Usgn)
3627       Ops[0] = Builder.CreateZExt(Ops[0], VTy);
3628     else
3629       Ops[0] = Builder.CreateSExt(Ops[0], VTy);
3630     Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
3631     return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
3632   }
3633   case NEON::BI__builtin_neon_vshrn_n_v: {
3634     llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
3635     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3636     Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
3637     if (Usgn)
3638       Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
3639     else
3640       Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
3641     return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
3642   }
3643   case NEON::BI__builtin_neon_vshr_n_v:
3644   case NEON::BI__builtin_neon_vshrq_n_v:
3645     return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
3646   case NEON::BI__builtin_neon_vst1_v:
3647   case NEON::BI__builtin_neon_vst1q_v:
3648   case NEON::BI__builtin_neon_vst2_v:
3649   case NEON::BI__builtin_neon_vst2q_v:
3650   case NEON::BI__builtin_neon_vst3_v:
3651   case NEON::BI__builtin_neon_vst3q_v:
3652   case NEON::BI__builtin_neon_vst4_v:
3653   case NEON::BI__builtin_neon_vst4q_v:
3654   case NEON::BI__builtin_neon_vst2_lane_v:
3655   case NEON::BI__builtin_neon_vst2q_lane_v:
3656   case NEON::BI__builtin_neon_vst3_lane_v:
3657   case NEON::BI__builtin_neon_vst3q_lane_v:
3658   case NEON::BI__builtin_neon_vst4_lane_v:
3659   case NEON::BI__builtin_neon_vst4q_lane_v: {
3660     llvm::Type *Tys[] = {Int8PtrTy, Ty};
3661     Ops.push_back(getAlignmentValue32(PtrOp0));
3662     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
3663   }
3664   case NEON::BI__builtin_neon_vsubhn_v: {
3665     llvm::VectorType *SrcTy =
3666         llvm::VectorType::getExtendedElementVectorType(VTy);
3667 
3668     // %sum = add <4 x i32> %lhs, %rhs
3669     Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3670     Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3671     Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
3672 
3673     // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3674     Constant *ShiftAmt =
3675         ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3676     Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
3677 
3678     // %res = trunc <4 x i32> %high to <4 x i16>
3679     return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
3680   }
3681   case NEON::BI__builtin_neon_vtrn_v:
3682   case NEON::BI__builtin_neon_vtrnq_v: {
3683     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3684     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3685     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3686     Value *SV = nullptr;
3687 
3688     for (unsigned vi = 0; vi != 2; ++vi) {
3689       SmallVector<uint32_t, 16> Indices;
3690       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3691         Indices.push_back(i+vi);
3692         Indices.push_back(i+e+vi);
3693       }
3694       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3695       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
3696       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3697     }
3698     return SV;
3699   }
3700   case NEON::BI__builtin_neon_vtst_v:
3701   case NEON::BI__builtin_neon_vtstq_v: {
3702     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3703     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3704     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
3705     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
3706                                 ConstantAggregateZero::get(Ty));
3707     return Builder.CreateSExt(Ops[0], Ty, "vtst");
3708   }
3709   case NEON::BI__builtin_neon_vuzp_v:
3710   case NEON::BI__builtin_neon_vuzpq_v: {
3711     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3712     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3713     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3714     Value *SV = nullptr;
3715 
3716     for (unsigned vi = 0; vi != 2; ++vi) {
3717       SmallVector<uint32_t, 16> Indices;
3718       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3719         Indices.push_back(2*i+vi);
3720 
3721       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3722       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
3723       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3724     }
3725     return SV;
3726   }
3727   case NEON::BI__builtin_neon_vzip_v:
3728   case NEON::BI__builtin_neon_vzipq_v: {
3729     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
3730     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3731     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3732     Value *SV = nullptr;
3733 
3734     for (unsigned vi = 0; vi != 2; ++vi) {
3735       SmallVector<uint32_t, 16> Indices;
3736       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
3737         Indices.push_back((i + vi*e) >> 1);
3738         Indices.push_back(((i + vi*e) >> 1)+e);
3739       }
3740       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
3741       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
3742       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
3743     }
3744     return SV;
3745   }
3746   }
3747 
3748   assert(Int && "Expected valid intrinsic number");
3749 
3750   // Determine the type(s) of this overloaded AArch64 intrinsic.
3751   Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
3752 
3753   Value *Result = EmitNeonCall(F, Ops, NameHint);
3754   llvm::Type *ResultType = ConvertType(E->getType());
3755   // AArch64 intrinsic one-element vector type cast to
3756   // scalar type expected by the builtin
3757   return Builder.CreateBitCast(Result, ResultType, NameHint);
3758 }
3759 
EmitAArch64CompareBuiltinExpr(Value * Op,llvm::Type * Ty,const CmpInst::Predicate Fp,const CmpInst::Predicate Ip,const Twine & Name)3760 Value *CodeGenFunction::EmitAArch64CompareBuiltinExpr(
3761     Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
3762     const CmpInst::Predicate Ip, const Twine &Name) {
3763   llvm::Type *OTy = Op->getType();
3764 
3765   // FIXME: this is utterly horrific. We should not be looking at previous
3766   // codegen context to find out what needs doing. Unfortunately TableGen
3767   // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
3768   // (etc).
3769   if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
3770     OTy = BI->getOperand(0)->getType();
3771 
3772   Op = Builder.CreateBitCast(Op, OTy);
3773   if (OTy->getScalarType()->isFloatingPointTy()) {
3774     Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
3775   } else {
3776     Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
3777   }
3778   return Builder.CreateSExt(Op, Ty, Name);
3779 }
3780 
packTBLDVectorList(CodeGenFunction & CGF,ArrayRef<Value * > Ops,Value * ExtOp,Value * IndexOp,llvm::Type * ResTy,unsigned IntID,const char * Name)3781 static Value *packTBLDVectorList(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
3782                                  Value *ExtOp, Value *IndexOp,
3783                                  llvm::Type *ResTy, unsigned IntID,
3784                                  const char *Name) {
3785   SmallVector<Value *, 2> TblOps;
3786   if (ExtOp)
3787     TblOps.push_back(ExtOp);
3788 
3789   // Build a vector containing sequential number like (0, 1, 2, ..., 15)
3790   SmallVector<uint32_t, 16> Indices;
3791   llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
3792   for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
3793     Indices.push_back(2*i);
3794     Indices.push_back(2*i+1);
3795   }
3796 
3797   int PairPos = 0, End = Ops.size() - 1;
3798   while (PairPos < End) {
3799     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3800                                                      Ops[PairPos+1], Indices,
3801                                                      Name));
3802     PairPos += 2;
3803   }
3804 
3805   // If there's an odd number of 64-bit lookup table, fill the high 64-bit
3806   // of the 128-bit lookup table with zero.
3807   if (PairPos == End) {
3808     Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
3809     TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
3810                                                      ZeroTbl, Indices, Name));
3811   }
3812 
3813   Function *TblF;
3814   TblOps.push_back(IndexOp);
3815   TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
3816 
3817   return CGF.EmitNeonCall(TblF, TblOps, Name);
3818 }
3819 
GetValueForARMHint(unsigned BuiltinID)3820 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
3821   unsigned Value;
3822   switch (BuiltinID) {
3823   default:
3824     return nullptr;
3825   case ARM::BI__builtin_arm_nop:
3826     Value = 0;
3827     break;
3828   case ARM::BI__builtin_arm_yield:
3829   case ARM::BI__yield:
3830     Value = 1;
3831     break;
3832   case ARM::BI__builtin_arm_wfe:
3833   case ARM::BI__wfe:
3834     Value = 2;
3835     break;
3836   case ARM::BI__builtin_arm_wfi:
3837   case ARM::BI__wfi:
3838     Value = 3;
3839     break;
3840   case ARM::BI__builtin_arm_sev:
3841   case ARM::BI__sev:
3842     Value = 4;
3843     break;
3844   case ARM::BI__builtin_arm_sevl:
3845   case ARM::BI__sevl:
3846     Value = 5;
3847     break;
3848   }
3849 
3850   return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
3851                             llvm::ConstantInt::get(Int32Ty, Value));
3852 }
3853 
3854 // Generates the IR for the read/write special register builtin,
3855 // ValueType is the type of the value that is to be written or read,
3856 // RegisterType is the type of the register being written to or read from.
EmitSpecialRegisterBuiltin(CodeGenFunction & CGF,const CallExpr * E,llvm::Type * RegisterType,llvm::Type * ValueType,bool IsRead,StringRef SysReg="")3857 static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
3858                                          const CallExpr *E,
3859                                          llvm::Type *RegisterType,
3860                                          llvm::Type *ValueType,
3861                                          bool IsRead,
3862                                          StringRef SysReg = "") {
3863   // write and register intrinsics only support 32 and 64 bit operations.
3864   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
3865           && "Unsupported size for register.");
3866 
3867   CodeGen::CGBuilderTy &Builder = CGF.Builder;
3868   CodeGen::CodeGenModule &CGM = CGF.CGM;
3869   LLVMContext &Context = CGM.getLLVMContext();
3870 
3871   if (SysReg.empty()) {
3872     const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
3873     SysReg = cast<StringLiteral>(SysRegStrExpr)->getString();
3874   }
3875 
3876   llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
3877   llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
3878   llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
3879 
3880   llvm::Type *Types[] = { RegisterType };
3881 
3882   bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
3883   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
3884             && "Can't fit 64-bit value in 32-bit register");
3885 
3886   if (IsRead) {
3887     llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
3888     llvm::Value *Call = Builder.CreateCall(F, Metadata);
3889 
3890     if (MixedTypes)
3891       // Read into 64 bit register and then truncate result to 32 bit.
3892       return Builder.CreateTrunc(Call, ValueType);
3893 
3894     if (ValueType->isPointerTy())
3895       // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
3896       return Builder.CreateIntToPtr(Call, ValueType);
3897 
3898     return Call;
3899   }
3900 
3901   llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
3902   llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
3903   if (MixedTypes) {
3904     // Extend 32 bit write value to 64 bit to pass to write.
3905     ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
3906     return Builder.CreateCall(F, { Metadata, ArgValue });
3907   }
3908 
3909   if (ValueType->isPointerTy()) {
3910     // Have VoidPtrTy ArgValue but want to return an i32/i64.
3911     ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
3912     return Builder.CreateCall(F, { Metadata, ArgValue });
3913   }
3914 
3915   return Builder.CreateCall(F, { Metadata, ArgValue });
3916 }
3917 
3918 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
3919 /// argument that specifies the vector type.
HasExtraNeonArgument(unsigned BuiltinID)3920 static bool HasExtraNeonArgument(unsigned BuiltinID) {
3921   switch (BuiltinID) {
3922   default: break;
3923   case NEON::BI__builtin_neon_vget_lane_i8:
3924   case NEON::BI__builtin_neon_vget_lane_i16:
3925   case NEON::BI__builtin_neon_vget_lane_i32:
3926   case NEON::BI__builtin_neon_vget_lane_i64:
3927   case NEON::BI__builtin_neon_vget_lane_f32:
3928   case NEON::BI__builtin_neon_vgetq_lane_i8:
3929   case NEON::BI__builtin_neon_vgetq_lane_i16:
3930   case NEON::BI__builtin_neon_vgetq_lane_i32:
3931   case NEON::BI__builtin_neon_vgetq_lane_i64:
3932   case NEON::BI__builtin_neon_vgetq_lane_f32:
3933   case NEON::BI__builtin_neon_vset_lane_i8:
3934   case NEON::BI__builtin_neon_vset_lane_i16:
3935   case NEON::BI__builtin_neon_vset_lane_i32:
3936   case NEON::BI__builtin_neon_vset_lane_i64:
3937   case NEON::BI__builtin_neon_vset_lane_f32:
3938   case NEON::BI__builtin_neon_vsetq_lane_i8:
3939   case NEON::BI__builtin_neon_vsetq_lane_i16:
3940   case NEON::BI__builtin_neon_vsetq_lane_i32:
3941   case NEON::BI__builtin_neon_vsetq_lane_i64:
3942   case NEON::BI__builtin_neon_vsetq_lane_f32:
3943   case NEON::BI__builtin_neon_vsha1h_u32:
3944   case NEON::BI__builtin_neon_vsha1cq_u32:
3945   case NEON::BI__builtin_neon_vsha1pq_u32:
3946   case NEON::BI__builtin_neon_vsha1mq_u32:
3947   case ARM::BI_MoveToCoprocessor:
3948   case ARM::BI_MoveToCoprocessor2:
3949     return false;
3950   }
3951   return true;
3952 }
3953 
EmitARMBuiltinExpr(unsigned BuiltinID,const CallExpr * E)3954 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
3955                                            const CallExpr *E) {
3956   if (auto Hint = GetValueForARMHint(BuiltinID))
3957     return Hint;
3958 
3959   if (BuiltinID == ARM::BI__emit) {
3960     bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
3961     llvm::FunctionType *FTy =
3962         llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
3963 
3964     APSInt Value;
3965     if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
3966       llvm_unreachable("Sema will ensure that the parameter is constant");
3967 
3968     uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
3969 
3970     llvm::InlineAsm *Emit =
3971         IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
3972                                  /*SideEffects=*/true)
3973                 : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
3974                                  /*SideEffects=*/true);
3975 
3976     return Builder.CreateCall(Emit);
3977   }
3978 
3979   if (BuiltinID == ARM::BI__builtin_arm_dbg) {
3980     Value *Option = EmitScalarExpr(E->getArg(0));
3981     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
3982   }
3983 
3984   if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
3985     Value *Address = EmitScalarExpr(E->getArg(0));
3986     Value *RW      = EmitScalarExpr(E->getArg(1));
3987     Value *IsData  = EmitScalarExpr(E->getArg(2));
3988 
3989     // Locality is not supported on ARM target
3990     Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
3991 
3992     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
3993     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
3994   }
3995 
3996   if (BuiltinID == ARM::BI__builtin_arm_rbit) {
3997     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_rbit),
3998                                                EmitScalarExpr(E->getArg(0)),
3999                               "rbit");
4000   }
4001 
4002   if (BuiltinID == ARM::BI__clear_cache) {
4003     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4004     const FunctionDecl *FD = E->getDirectCallee();
4005     Value *Ops[2];
4006     for (unsigned i = 0; i < 2; i++)
4007       Ops[i] = EmitScalarExpr(E->getArg(i));
4008     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4009     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4010     StringRef Name = FD->getName();
4011     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4012   }
4013 
4014   if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4015       BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4016     Function *F;
4017 
4018     switch (BuiltinID) {
4019     default: llvm_unreachable("unexpected builtin");
4020     case ARM::BI__builtin_arm_mcrr:
4021       F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4022       break;
4023     case ARM::BI__builtin_arm_mcrr2:
4024       F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4025       break;
4026     }
4027 
4028     // MCRR{2} instruction has 5 operands but
4029     // the intrinsic has 4 because Rt and Rt2
4030     // are represented as a single unsigned 64
4031     // bit integer in the intrinsic definition
4032     // but internally it's represented as 2 32
4033     // bit integers.
4034 
4035     Value *Coproc = EmitScalarExpr(E->getArg(0));
4036     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4037     Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4038     Value *CRm = EmitScalarExpr(E->getArg(3));
4039 
4040     Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4041     Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4042     Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4043     Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4044 
4045     return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4046   }
4047 
4048   if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4049       BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4050     Function *F;
4051 
4052     switch (BuiltinID) {
4053     default: llvm_unreachable("unexpected builtin");
4054     case ARM::BI__builtin_arm_mrrc:
4055       F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4056       break;
4057     case ARM::BI__builtin_arm_mrrc2:
4058       F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4059       break;
4060     }
4061 
4062     Value *Coproc = EmitScalarExpr(E->getArg(0));
4063     Value *Opc1 = EmitScalarExpr(E->getArg(1));
4064     Value *CRm  = EmitScalarExpr(E->getArg(2));
4065     Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4066 
4067     // Returns an unsigned 64 bit integer, represented
4068     // as two 32 bit integers.
4069 
4070     Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4071     Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4072     Rt = Builder.CreateZExt(Rt, Int64Ty);
4073     Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4074 
4075     Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4076     RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4077     RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4078 
4079     return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4080   }
4081 
4082   if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4083       ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4084         BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4085        getContext().getTypeSize(E->getType()) == 64) ||
4086       BuiltinID == ARM::BI__ldrexd) {
4087     Function *F;
4088 
4089     switch (BuiltinID) {
4090     default: llvm_unreachable("unexpected builtin");
4091     case ARM::BI__builtin_arm_ldaex:
4092       F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4093       break;
4094     case ARM::BI__builtin_arm_ldrexd:
4095     case ARM::BI__builtin_arm_ldrex:
4096     case ARM::BI__ldrexd:
4097       F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4098       break;
4099     }
4100 
4101     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4102     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4103                                     "ldrexd");
4104 
4105     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4106     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4107     Val0 = Builder.CreateZExt(Val0, Int64Ty);
4108     Val1 = Builder.CreateZExt(Val1, Int64Ty);
4109 
4110     Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4111     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4112     Val = Builder.CreateOr(Val, Val1);
4113     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4114   }
4115 
4116   if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4117       BuiltinID == ARM::BI__builtin_arm_ldaex) {
4118     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4119 
4120     QualType Ty = E->getType();
4121     llvm::Type *RealResTy = ConvertType(Ty);
4122     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4123                                                   getContext().getTypeSize(Ty));
4124     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4125 
4126     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4127                                        ? Intrinsic::arm_ldaex
4128                                        : Intrinsic::arm_ldrex,
4129                                    LoadAddr->getType());
4130     Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4131 
4132     if (RealResTy->isPointerTy())
4133       return Builder.CreateIntToPtr(Val, RealResTy);
4134     else {
4135       Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4136       return Builder.CreateBitCast(Val, RealResTy);
4137     }
4138   }
4139 
4140   if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4141       ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4142         BuiltinID == ARM::BI__builtin_arm_strex) &&
4143        getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4144     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4145                                        ? Intrinsic::arm_stlexd
4146                                        : Intrinsic::arm_strexd);
4147     llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, nullptr);
4148 
4149     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4150     Value *Val = EmitScalarExpr(E->getArg(0));
4151     Builder.CreateStore(Val, Tmp);
4152 
4153     Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4154     Val = Builder.CreateLoad(LdPtr);
4155 
4156     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4157     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4158     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4159     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4160   }
4161 
4162   if (BuiltinID == ARM::BI__builtin_arm_strex ||
4163       BuiltinID == ARM::BI__builtin_arm_stlex) {
4164     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4165     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4166 
4167     QualType Ty = E->getArg(0)->getType();
4168     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4169                                                  getContext().getTypeSize(Ty));
4170     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4171 
4172     if (StoreVal->getType()->isPointerTy())
4173       StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4174     else {
4175       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4176       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4177     }
4178 
4179     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4180                                        ? Intrinsic::arm_stlex
4181                                        : Intrinsic::arm_strex,
4182                                    StoreAddr->getType());
4183     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4184   }
4185 
4186   if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4187     Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4188     return Builder.CreateCall(F);
4189   }
4190 
4191   // CRC32
4192   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4193   switch (BuiltinID) {
4194   case ARM::BI__builtin_arm_crc32b:
4195     CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4196   case ARM::BI__builtin_arm_crc32cb:
4197     CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4198   case ARM::BI__builtin_arm_crc32h:
4199     CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4200   case ARM::BI__builtin_arm_crc32ch:
4201     CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4202   case ARM::BI__builtin_arm_crc32w:
4203   case ARM::BI__builtin_arm_crc32d:
4204     CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4205   case ARM::BI__builtin_arm_crc32cw:
4206   case ARM::BI__builtin_arm_crc32cd:
4207     CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4208   }
4209 
4210   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4211     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4212     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4213 
4214     // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4215     // intrinsics, hence we need different codegen for these cases.
4216     if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4217         BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4218       Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4219       Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4220       Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4221       Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4222 
4223       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4224       Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4225       return Builder.CreateCall(F, {Res, Arg1b});
4226     } else {
4227       Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4228 
4229       Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4230       return Builder.CreateCall(F, {Arg0, Arg1});
4231     }
4232   }
4233 
4234   if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4235       BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4236       BuiltinID == ARM::BI__builtin_arm_rsrp ||
4237       BuiltinID == ARM::BI__builtin_arm_wsr ||
4238       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4239       BuiltinID == ARM::BI__builtin_arm_wsrp) {
4240 
4241     bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4242                   BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4243                   BuiltinID == ARM::BI__builtin_arm_rsrp;
4244 
4245     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4246                             BuiltinID == ARM::BI__builtin_arm_wsrp;
4247 
4248     bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4249                    BuiltinID == ARM::BI__builtin_arm_wsr64;
4250 
4251     llvm::Type *ValueType;
4252     llvm::Type *RegisterType;
4253     if (IsPointerBuiltin) {
4254       ValueType = VoidPtrTy;
4255       RegisterType = Int32Ty;
4256     } else if (Is64Bit) {
4257       ValueType = RegisterType = Int64Ty;
4258     } else {
4259       ValueType = RegisterType = Int32Ty;
4260     }
4261 
4262     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4263   }
4264 
4265   // Find out if any arguments are required to be integer constant
4266   // expressions.
4267   unsigned ICEArguments = 0;
4268   ASTContext::GetBuiltinTypeError Error;
4269   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4270   assert(Error == ASTContext::GE_None && "Should not codegen an error");
4271 
4272   auto getAlignmentValue32 = [&](Address addr) -> Value* {
4273     return Builder.getInt32(addr.getAlignment().getQuantity());
4274   };
4275 
4276   Address PtrOp0 = Address::invalid();
4277   Address PtrOp1 = Address::invalid();
4278   SmallVector<Value*, 4> Ops;
4279   bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4280   unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4281   for (unsigned i = 0, e = NumArgs; i != e; i++) {
4282     if (i == 0) {
4283       switch (BuiltinID) {
4284       case NEON::BI__builtin_neon_vld1_v:
4285       case NEON::BI__builtin_neon_vld1q_v:
4286       case NEON::BI__builtin_neon_vld1q_lane_v:
4287       case NEON::BI__builtin_neon_vld1_lane_v:
4288       case NEON::BI__builtin_neon_vld1_dup_v:
4289       case NEON::BI__builtin_neon_vld1q_dup_v:
4290       case NEON::BI__builtin_neon_vst1_v:
4291       case NEON::BI__builtin_neon_vst1q_v:
4292       case NEON::BI__builtin_neon_vst1q_lane_v:
4293       case NEON::BI__builtin_neon_vst1_lane_v:
4294       case NEON::BI__builtin_neon_vst2_v:
4295       case NEON::BI__builtin_neon_vst2q_v:
4296       case NEON::BI__builtin_neon_vst2_lane_v:
4297       case NEON::BI__builtin_neon_vst2q_lane_v:
4298       case NEON::BI__builtin_neon_vst3_v:
4299       case NEON::BI__builtin_neon_vst3q_v:
4300       case NEON::BI__builtin_neon_vst3_lane_v:
4301       case NEON::BI__builtin_neon_vst3q_lane_v:
4302       case NEON::BI__builtin_neon_vst4_v:
4303       case NEON::BI__builtin_neon_vst4q_v:
4304       case NEON::BI__builtin_neon_vst4_lane_v:
4305       case NEON::BI__builtin_neon_vst4q_lane_v:
4306         // Get the alignment for the argument in addition to the value;
4307         // we'll use it later.
4308         PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4309         Ops.push_back(PtrOp0.getPointer());
4310         continue;
4311       }
4312     }
4313     if (i == 1) {
4314       switch (BuiltinID) {
4315       case NEON::BI__builtin_neon_vld2_v:
4316       case NEON::BI__builtin_neon_vld2q_v:
4317       case NEON::BI__builtin_neon_vld3_v:
4318       case NEON::BI__builtin_neon_vld3q_v:
4319       case NEON::BI__builtin_neon_vld4_v:
4320       case NEON::BI__builtin_neon_vld4q_v:
4321       case NEON::BI__builtin_neon_vld2_lane_v:
4322       case NEON::BI__builtin_neon_vld2q_lane_v:
4323       case NEON::BI__builtin_neon_vld3_lane_v:
4324       case NEON::BI__builtin_neon_vld3q_lane_v:
4325       case NEON::BI__builtin_neon_vld4_lane_v:
4326       case NEON::BI__builtin_neon_vld4q_lane_v:
4327       case NEON::BI__builtin_neon_vld2_dup_v:
4328       case NEON::BI__builtin_neon_vld3_dup_v:
4329       case NEON::BI__builtin_neon_vld4_dup_v:
4330         // Get the alignment for the argument in addition to the value;
4331         // we'll use it later.
4332         PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4333         Ops.push_back(PtrOp1.getPointer());
4334         continue;
4335       }
4336     }
4337 
4338     if ((ICEArguments & (1 << i)) == 0) {
4339       Ops.push_back(EmitScalarExpr(E->getArg(i)));
4340     } else {
4341       // If this is required to be a constant, constant fold it so that we know
4342       // that the generated intrinsic gets a ConstantInt.
4343       llvm::APSInt Result;
4344       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4345       assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4346       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4347     }
4348   }
4349 
4350   switch (BuiltinID) {
4351   default: break;
4352 
4353   case NEON::BI__builtin_neon_vget_lane_i8:
4354   case NEON::BI__builtin_neon_vget_lane_i16:
4355   case NEON::BI__builtin_neon_vget_lane_i32:
4356   case NEON::BI__builtin_neon_vget_lane_i64:
4357   case NEON::BI__builtin_neon_vget_lane_f32:
4358   case NEON::BI__builtin_neon_vgetq_lane_i8:
4359   case NEON::BI__builtin_neon_vgetq_lane_i16:
4360   case NEON::BI__builtin_neon_vgetq_lane_i32:
4361   case NEON::BI__builtin_neon_vgetq_lane_i64:
4362   case NEON::BI__builtin_neon_vgetq_lane_f32:
4363     return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4364 
4365   case NEON::BI__builtin_neon_vset_lane_i8:
4366   case NEON::BI__builtin_neon_vset_lane_i16:
4367   case NEON::BI__builtin_neon_vset_lane_i32:
4368   case NEON::BI__builtin_neon_vset_lane_i64:
4369   case NEON::BI__builtin_neon_vset_lane_f32:
4370   case NEON::BI__builtin_neon_vsetq_lane_i8:
4371   case NEON::BI__builtin_neon_vsetq_lane_i16:
4372   case NEON::BI__builtin_neon_vsetq_lane_i32:
4373   case NEON::BI__builtin_neon_vsetq_lane_i64:
4374   case NEON::BI__builtin_neon_vsetq_lane_f32:
4375     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4376 
4377   case NEON::BI__builtin_neon_vsha1h_u32:
4378     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4379                         "vsha1h");
4380   case NEON::BI__builtin_neon_vsha1cq_u32:
4381     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4382                         "vsha1h");
4383   case NEON::BI__builtin_neon_vsha1pq_u32:
4384     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4385                         "vsha1h");
4386   case NEON::BI__builtin_neon_vsha1mq_u32:
4387     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4388                         "vsha1h");
4389 
4390   // The ARM _MoveToCoprocessor builtins put the input register value as
4391   // the first argument, but the LLVM intrinsic expects it as the third one.
4392   case ARM::BI_MoveToCoprocessor:
4393   case ARM::BI_MoveToCoprocessor2: {
4394     Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4395                                    Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4396     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4397                                   Ops[3], Ops[4], Ops[5]});
4398   }
4399   }
4400 
4401   // Get the last argument, which specifies the vector type.
4402   assert(HasExtraArg);
4403   llvm::APSInt Result;
4404   const Expr *Arg = E->getArg(E->getNumArgs()-1);
4405   if (!Arg->isIntegerConstantExpr(Result, getContext()))
4406     return nullptr;
4407 
4408   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4409       BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4410     // Determine the overloaded type of this builtin.
4411     llvm::Type *Ty;
4412     if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4413       Ty = FloatTy;
4414     else
4415       Ty = DoubleTy;
4416 
4417     // Determine whether this is an unsigned conversion or not.
4418     bool usgn = Result.getZExtValue() == 1;
4419     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4420 
4421     // Call the appropriate intrinsic.
4422     Function *F = CGM.getIntrinsic(Int, Ty);
4423     return Builder.CreateCall(F, Ops, "vcvtr");
4424   }
4425 
4426   // Determine the type of this overloaded NEON intrinsic.
4427   NeonTypeFlags Type(Result.getZExtValue());
4428   bool usgn = Type.isUnsigned();
4429   bool rightShift = false;
4430 
4431   llvm::VectorType *VTy = GetNeonType(this, Type);
4432   llvm::Type *Ty = VTy;
4433   if (!Ty)
4434     return nullptr;
4435 
4436   // Many NEON builtins have identical semantics and uses in ARM and
4437   // AArch64. Emit these in a single function.
4438   auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4439   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4440       IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4441   if (Builtin)
4442     return EmitCommonNeonBuiltinExpr(
4443         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4444         Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4445 
4446   unsigned Int;
4447   switch (BuiltinID) {
4448   default: return nullptr;
4449   case NEON::BI__builtin_neon_vld1q_lane_v:
4450     // Handle 64-bit integer elements as a special case.  Use shuffles of
4451     // one-element vectors to avoid poor code for i64 in the backend.
4452     if (VTy->getElementType()->isIntegerTy(64)) {
4453       // Extract the other lane.
4454       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4455       uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4456       Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4457       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4458       // Load the value as a one-element vector.
4459       Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4460       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4461       Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4462       Value *Align = getAlignmentValue32(PtrOp0);
4463       Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4464       // Combine them.
4465       uint32_t Indices[] = {1 - Lane, Lane};
4466       SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4467       return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4468     }
4469     // fall through
4470   case NEON::BI__builtin_neon_vld1_lane_v: {
4471     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4472     PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4473     Value *Ld = Builder.CreateLoad(PtrOp0);
4474     return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4475   }
4476   case NEON::BI__builtin_neon_vld2_dup_v:
4477   case NEON::BI__builtin_neon_vld3_dup_v:
4478   case NEON::BI__builtin_neon_vld4_dup_v: {
4479     // Handle 64-bit elements as a special-case.  There is no "dup" needed.
4480     if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4481       switch (BuiltinID) {
4482       case NEON::BI__builtin_neon_vld2_dup_v:
4483         Int = Intrinsic::arm_neon_vld2;
4484         break;
4485       case NEON::BI__builtin_neon_vld3_dup_v:
4486         Int = Intrinsic::arm_neon_vld3;
4487         break;
4488       case NEON::BI__builtin_neon_vld4_dup_v:
4489         Int = Intrinsic::arm_neon_vld4;
4490         break;
4491       default: llvm_unreachable("unknown vld_dup intrinsic?");
4492       }
4493       llvm::Type *Tys[] = {Ty, Int8PtrTy};
4494       Function *F = CGM.getIntrinsic(Int, Tys);
4495       llvm::Value *Align = getAlignmentValue32(PtrOp1);
4496       Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
4497       Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4498       Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4499       return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4500     }
4501     switch (BuiltinID) {
4502     case NEON::BI__builtin_neon_vld2_dup_v:
4503       Int = Intrinsic::arm_neon_vld2lane;
4504       break;
4505     case NEON::BI__builtin_neon_vld3_dup_v:
4506       Int = Intrinsic::arm_neon_vld3lane;
4507       break;
4508     case NEON::BI__builtin_neon_vld4_dup_v:
4509       Int = Intrinsic::arm_neon_vld4lane;
4510       break;
4511     default: llvm_unreachable("unknown vld_dup intrinsic?");
4512     }
4513     llvm::Type *Tys[] = {Ty, Int8PtrTy};
4514     Function *F = CGM.getIntrinsic(Int, Tys);
4515     llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
4516 
4517     SmallVector<Value*, 6> Args;
4518     Args.push_back(Ops[1]);
4519     Args.append(STy->getNumElements(), UndefValue::get(Ty));
4520 
4521     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
4522     Args.push_back(CI);
4523     Args.push_back(getAlignmentValue32(PtrOp1));
4524 
4525     Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
4526     // splat lane 0 to all elts in each vector of the result.
4527     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4528       Value *Val = Builder.CreateExtractValue(Ops[1], i);
4529       Value *Elt = Builder.CreateBitCast(Val, Ty);
4530       Elt = EmitNeonSplat(Elt, CI);
4531       Elt = Builder.CreateBitCast(Elt, Val->getType());
4532       Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
4533     }
4534     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4535     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4536     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4537   }
4538   case NEON::BI__builtin_neon_vqrshrn_n_v:
4539     Int =
4540       usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
4541     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
4542                         1, true);
4543   case NEON::BI__builtin_neon_vqrshrun_n_v:
4544     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
4545                         Ops, "vqrshrun_n", 1, true);
4546   case NEON::BI__builtin_neon_vqshrn_n_v:
4547     Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
4548     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
4549                         1, true);
4550   case NEON::BI__builtin_neon_vqshrun_n_v:
4551     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
4552                         Ops, "vqshrun_n", 1, true);
4553   case NEON::BI__builtin_neon_vrecpe_v:
4554   case NEON::BI__builtin_neon_vrecpeq_v:
4555     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
4556                         Ops, "vrecpe");
4557   case NEON::BI__builtin_neon_vrshrn_n_v:
4558     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
4559                         Ops, "vrshrn_n", 1, true);
4560   case NEON::BI__builtin_neon_vrsra_n_v:
4561   case NEON::BI__builtin_neon_vrsraq_n_v:
4562     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4563     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4564     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
4565     Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
4566     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
4567     return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
4568   case NEON::BI__builtin_neon_vsri_n_v:
4569   case NEON::BI__builtin_neon_vsriq_n_v:
4570     rightShift = true;
4571   case NEON::BI__builtin_neon_vsli_n_v:
4572   case NEON::BI__builtin_neon_vsliq_n_v:
4573     Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
4574     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
4575                         Ops, "vsli_n");
4576   case NEON::BI__builtin_neon_vsra_n_v:
4577   case NEON::BI__builtin_neon_vsraq_n_v:
4578     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4579     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
4580     return Builder.CreateAdd(Ops[0], Ops[1]);
4581   case NEON::BI__builtin_neon_vst1q_lane_v:
4582     // Handle 64-bit integer elements as a special case.  Use a shuffle to get
4583     // a one-element vector and avoid poor code for i64 in the backend.
4584     if (VTy->getElementType()->isIntegerTy(64)) {
4585       Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4586       Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
4587       Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4588       Ops[2] = getAlignmentValue32(PtrOp0);
4589       llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
4590       return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
4591                                                  Tys), Ops);
4592     }
4593     // fall through
4594   case NEON::BI__builtin_neon_vst1_lane_v: {
4595     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4596     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
4597     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
4598     auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
4599     return St;
4600   }
4601   case NEON::BI__builtin_neon_vtbl1_v:
4602     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
4603                         Ops, "vtbl1");
4604   case NEON::BI__builtin_neon_vtbl2_v:
4605     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
4606                         Ops, "vtbl2");
4607   case NEON::BI__builtin_neon_vtbl3_v:
4608     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
4609                         Ops, "vtbl3");
4610   case NEON::BI__builtin_neon_vtbl4_v:
4611     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
4612                         Ops, "vtbl4");
4613   case NEON::BI__builtin_neon_vtbx1_v:
4614     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
4615                         Ops, "vtbx1");
4616   case NEON::BI__builtin_neon_vtbx2_v:
4617     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
4618                         Ops, "vtbx2");
4619   case NEON::BI__builtin_neon_vtbx3_v:
4620     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
4621                         Ops, "vtbx3");
4622   case NEON::BI__builtin_neon_vtbx4_v:
4623     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
4624                         Ops, "vtbx4");
4625   }
4626 }
4627 
EmitAArch64TblBuiltinExpr(CodeGenFunction & CGF,unsigned BuiltinID,const CallExpr * E,SmallVectorImpl<Value * > & Ops)4628 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
4629                                       const CallExpr *E,
4630                                       SmallVectorImpl<Value *> &Ops) {
4631   unsigned int Int = 0;
4632   const char *s = nullptr;
4633 
4634   switch (BuiltinID) {
4635   default:
4636     return nullptr;
4637   case NEON::BI__builtin_neon_vtbl1_v:
4638   case NEON::BI__builtin_neon_vqtbl1_v:
4639   case NEON::BI__builtin_neon_vqtbl1q_v:
4640   case NEON::BI__builtin_neon_vtbl2_v:
4641   case NEON::BI__builtin_neon_vqtbl2_v:
4642   case NEON::BI__builtin_neon_vqtbl2q_v:
4643   case NEON::BI__builtin_neon_vtbl3_v:
4644   case NEON::BI__builtin_neon_vqtbl3_v:
4645   case NEON::BI__builtin_neon_vqtbl3q_v:
4646   case NEON::BI__builtin_neon_vtbl4_v:
4647   case NEON::BI__builtin_neon_vqtbl4_v:
4648   case NEON::BI__builtin_neon_vqtbl4q_v:
4649     break;
4650   case NEON::BI__builtin_neon_vtbx1_v:
4651   case NEON::BI__builtin_neon_vqtbx1_v:
4652   case NEON::BI__builtin_neon_vqtbx1q_v:
4653   case NEON::BI__builtin_neon_vtbx2_v:
4654   case NEON::BI__builtin_neon_vqtbx2_v:
4655   case NEON::BI__builtin_neon_vqtbx2q_v:
4656   case NEON::BI__builtin_neon_vtbx3_v:
4657   case NEON::BI__builtin_neon_vqtbx3_v:
4658   case NEON::BI__builtin_neon_vqtbx3q_v:
4659   case NEON::BI__builtin_neon_vtbx4_v:
4660   case NEON::BI__builtin_neon_vqtbx4_v:
4661   case NEON::BI__builtin_neon_vqtbx4q_v:
4662     break;
4663   }
4664 
4665   assert(E->getNumArgs() >= 3);
4666 
4667   // Get the last argument, which specifies the vector type.
4668   llvm::APSInt Result;
4669   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
4670   if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
4671     return nullptr;
4672 
4673   // Determine the type of this overloaded NEON intrinsic.
4674   NeonTypeFlags Type(Result.getZExtValue());
4675   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
4676   if (!Ty)
4677     return nullptr;
4678 
4679   CodeGen::CGBuilderTy &Builder = CGF.Builder;
4680 
4681   // AArch64 scalar builtins are not overloaded, they do not have an extra
4682   // argument that specifies the vector type, need to handle each case.
4683   switch (BuiltinID) {
4684   case NEON::BI__builtin_neon_vtbl1_v: {
4685     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
4686                               Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
4687                               "vtbl1");
4688   }
4689   case NEON::BI__builtin_neon_vtbl2_v: {
4690     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
4691                               Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
4692                               "vtbl1");
4693   }
4694   case NEON::BI__builtin_neon_vtbl3_v: {
4695     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
4696                               Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
4697                               "vtbl2");
4698   }
4699   case NEON::BI__builtin_neon_vtbl4_v: {
4700     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
4701                               Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
4702                               "vtbl2");
4703   }
4704   case NEON::BI__builtin_neon_vtbx1_v: {
4705     Value *TblRes =
4706         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
4707                            Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
4708 
4709     llvm::Constant *EightV = ConstantInt::get(Ty, 8);
4710     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
4711     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4712 
4713     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4714     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4715     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4716   }
4717   case NEON::BI__builtin_neon_vtbx2_v: {
4718     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
4719                               Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
4720                               "vtbx1");
4721   }
4722   case NEON::BI__builtin_neon_vtbx3_v: {
4723     Value *TblRes =
4724         packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
4725                            Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
4726 
4727     llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
4728     Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
4729                                            TwentyFourV);
4730     CmpRes = Builder.CreateSExt(CmpRes, Ty);
4731 
4732     Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
4733     Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
4734     return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
4735   }
4736   case NEON::BI__builtin_neon_vtbx4_v: {
4737     return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
4738                               Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
4739                               "vtbx2");
4740   }
4741   case NEON::BI__builtin_neon_vqtbl1_v:
4742   case NEON::BI__builtin_neon_vqtbl1q_v:
4743     Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
4744   case NEON::BI__builtin_neon_vqtbl2_v:
4745   case NEON::BI__builtin_neon_vqtbl2q_v: {
4746     Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
4747   case NEON::BI__builtin_neon_vqtbl3_v:
4748   case NEON::BI__builtin_neon_vqtbl3q_v:
4749     Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
4750   case NEON::BI__builtin_neon_vqtbl4_v:
4751   case NEON::BI__builtin_neon_vqtbl4q_v:
4752     Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
4753   case NEON::BI__builtin_neon_vqtbx1_v:
4754   case NEON::BI__builtin_neon_vqtbx1q_v:
4755     Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
4756   case NEON::BI__builtin_neon_vqtbx2_v:
4757   case NEON::BI__builtin_neon_vqtbx2q_v:
4758     Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
4759   case NEON::BI__builtin_neon_vqtbx3_v:
4760   case NEON::BI__builtin_neon_vqtbx3q_v:
4761     Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
4762   case NEON::BI__builtin_neon_vqtbx4_v:
4763   case NEON::BI__builtin_neon_vqtbx4q_v:
4764     Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
4765   }
4766   }
4767 
4768   if (!Int)
4769     return nullptr;
4770 
4771   Function *F = CGF.CGM.getIntrinsic(Int, Ty);
4772   return CGF.EmitNeonCall(F, Ops, s);
4773 }
4774 
vectorWrapScalar16(Value * Op)4775 Value *CodeGenFunction::vectorWrapScalar16(Value *Op) {
4776   llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
4777   Op = Builder.CreateBitCast(Op, Int16Ty);
4778   Value *V = UndefValue::get(VTy);
4779   llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
4780   Op = Builder.CreateInsertElement(V, Op, CI);
4781   return Op;
4782 }
4783 
EmitAArch64BuiltinExpr(unsigned BuiltinID,const CallExpr * E)4784 Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
4785                                                const CallExpr *E) {
4786   unsigned HintID = static_cast<unsigned>(-1);
4787   switch (BuiltinID) {
4788   default: break;
4789   case AArch64::BI__builtin_arm_nop:
4790     HintID = 0;
4791     break;
4792   case AArch64::BI__builtin_arm_yield:
4793     HintID = 1;
4794     break;
4795   case AArch64::BI__builtin_arm_wfe:
4796     HintID = 2;
4797     break;
4798   case AArch64::BI__builtin_arm_wfi:
4799     HintID = 3;
4800     break;
4801   case AArch64::BI__builtin_arm_sev:
4802     HintID = 4;
4803     break;
4804   case AArch64::BI__builtin_arm_sevl:
4805     HintID = 5;
4806     break;
4807   }
4808 
4809   if (HintID != static_cast<unsigned>(-1)) {
4810     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
4811     return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
4812   }
4813 
4814   if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
4815     Value *Address         = EmitScalarExpr(E->getArg(0));
4816     Value *RW              = EmitScalarExpr(E->getArg(1));
4817     Value *CacheLevel      = EmitScalarExpr(E->getArg(2));
4818     Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
4819     Value *IsData          = EmitScalarExpr(E->getArg(4));
4820 
4821     Value *Locality = nullptr;
4822     if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
4823       // Temporal fetch, needs to convert cache level to locality.
4824       Locality = llvm::ConstantInt::get(Int32Ty,
4825         -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
4826     } else {
4827       // Streaming fetch.
4828       Locality = llvm::ConstantInt::get(Int32Ty, 0);
4829     }
4830 
4831     // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
4832     // PLDL3STRM or PLDL2STRM.
4833     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4834     return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4835   }
4836 
4837   if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
4838     assert((getContext().getTypeSize(E->getType()) == 32) &&
4839            "rbit of unusual size!");
4840     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4841     return Builder.CreateCall(
4842         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4843   }
4844   if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
4845     assert((getContext().getTypeSize(E->getType()) == 64) &&
4846            "rbit of unusual size!");
4847     llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4848     return Builder.CreateCall(
4849         CGM.getIntrinsic(Intrinsic::aarch64_rbit, Arg->getType()), Arg, "rbit");
4850   }
4851 
4852   if (BuiltinID == AArch64::BI__clear_cache) {
4853     assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4854     const FunctionDecl *FD = E->getDirectCallee();
4855     Value *Ops[2];
4856     for (unsigned i = 0; i < 2; i++)
4857       Ops[i] = EmitScalarExpr(E->getArg(i));
4858     llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4859     llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4860     StringRef Name = FD->getName();
4861     return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4862   }
4863 
4864   if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4865       BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
4866       getContext().getTypeSize(E->getType()) == 128) {
4867     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4868                                        ? Intrinsic::aarch64_ldaxp
4869                                        : Intrinsic::aarch64_ldxp);
4870 
4871     Value *LdPtr = EmitScalarExpr(E->getArg(0));
4872     Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4873                                     "ldxp");
4874 
4875     Value *Val0 = Builder.CreateExtractValue(Val, 1);
4876     Value *Val1 = Builder.CreateExtractValue(Val, 0);
4877     llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
4878     Val0 = Builder.CreateZExt(Val0, Int128Ty);
4879     Val1 = Builder.CreateZExt(Val1, Int128Ty);
4880 
4881     Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
4882     Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4883     Val = Builder.CreateOr(Val, Val1);
4884     return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4885   } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
4886              BuiltinID == AArch64::BI__builtin_arm_ldaex) {
4887     Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4888 
4889     QualType Ty = E->getType();
4890     llvm::Type *RealResTy = ConvertType(Ty);
4891     llvm::Type *IntResTy = llvm::IntegerType::get(getLLVMContext(),
4892                                                   getContext().getTypeSize(Ty));
4893     LoadAddr = Builder.CreateBitCast(LoadAddr, IntResTy->getPointerTo());
4894 
4895     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
4896                                        ? Intrinsic::aarch64_ldaxr
4897                                        : Intrinsic::aarch64_ldxr,
4898                                    LoadAddr->getType());
4899     Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
4900 
4901     if (RealResTy->isPointerTy())
4902       return Builder.CreateIntToPtr(Val, RealResTy);
4903 
4904     Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4905     return Builder.CreateBitCast(Val, RealResTy);
4906   }
4907 
4908   if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
4909        BuiltinID == AArch64::BI__builtin_arm_stlex) &&
4910       getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
4911     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4912                                        ? Intrinsic::aarch64_stlxp
4913                                        : Intrinsic::aarch64_stxp);
4914     llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty, nullptr);
4915 
4916     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4917     EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
4918 
4919     Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
4920     llvm::Value *Val = Builder.CreateLoad(Tmp);
4921 
4922     Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4923     Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4924     Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
4925                                          Int8PtrTy);
4926     return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
4927   }
4928 
4929   if (BuiltinID == AArch64::BI__builtin_arm_strex ||
4930       BuiltinID == AArch64::BI__builtin_arm_stlex) {
4931     Value *StoreVal = EmitScalarExpr(E->getArg(0));
4932     Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4933 
4934     QualType Ty = E->getArg(0)->getType();
4935     llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4936                                                  getContext().getTypeSize(Ty));
4937     StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4938 
4939     if (StoreVal->getType()->isPointerTy())
4940       StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
4941     else {
4942       StoreVal = Builder.CreateBitCast(StoreVal, StoreTy);
4943       StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
4944     }
4945 
4946     Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
4947                                        ? Intrinsic::aarch64_stlxr
4948                                        : Intrinsic::aarch64_stxr,
4949                                    StoreAddr->getType());
4950     return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
4951   }
4952 
4953   if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
4954     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
4955     return Builder.CreateCall(F);
4956   }
4957 
4958   // CRC32
4959   Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4960   switch (BuiltinID) {
4961   case AArch64::BI__builtin_arm_crc32b:
4962     CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
4963   case AArch64::BI__builtin_arm_crc32cb:
4964     CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
4965   case AArch64::BI__builtin_arm_crc32h:
4966     CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
4967   case AArch64::BI__builtin_arm_crc32ch:
4968     CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
4969   case AArch64::BI__builtin_arm_crc32w:
4970     CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
4971   case AArch64::BI__builtin_arm_crc32cw:
4972     CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
4973   case AArch64::BI__builtin_arm_crc32d:
4974     CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
4975   case AArch64::BI__builtin_arm_crc32cd:
4976     CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
4977   }
4978 
4979   if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4980     Value *Arg0 = EmitScalarExpr(E->getArg(0));
4981     Value *Arg1 = EmitScalarExpr(E->getArg(1));
4982     Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4983 
4984     llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
4985     Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
4986 
4987     return Builder.CreateCall(F, {Arg0, Arg1});
4988   }
4989 
4990   if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
4991       BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4992       BuiltinID == AArch64::BI__builtin_arm_rsrp ||
4993       BuiltinID == AArch64::BI__builtin_arm_wsr ||
4994       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
4995       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
4996 
4997     bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
4998                   BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
4999                   BuiltinID == AArch64::BI__builtin_arm_rsrp;
5000 
5001     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5002                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
5003 
5004     bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5005                    BuiltinID != AArch64::BI__builtin_arm_wsr;
5006 
5007     llvm::Type *ValueType;
5008     llvm::Type *RegisterType = Int64Ty;
5009     if (IsPointerBuiltin) {
5010       ValueType = VoidPtrTy;
5011     } else if (Is64Bit) {
5012       ValueType = Int64Ty;
5013     } else {
5014       ValueType = Int32Ty;
5015     }
5016 
5017     return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5018   }
5019 
5020   // Find out if any arguments are required to be integer constant
5021   // expressions.
5022   unsigned ICEArguments = 0;
5023   ASTContext::GetBuiltinTypeError Error;
5024   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5025   assert(Error == ASTContext::GE_None && "Should not codegen an error");
5026 
5027   llvm::SmallVector<Value*, 4> Ops;
5028   for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5029     if ((ICEArguments & (1 << i)) == 0) {
5030       Ops.push_back(EmitScalarExpr(E->getArg(i)));
5031     } else {
5032       // If this is required to be a constant, constant fold it so that we know
5033       // that the generated intrinsic gets a ConstantInt.
5034       llvm::APSInt Result;
5035       bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5036       assert(IsConst && "Constant arg isn't actually constant?");
5037       (void)IsConst;
5038       Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5039     }
5040   }
5041 
5042   auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5043   const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5044       SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5045 
5046   if (Builtin) {
5047     Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5048     Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5049     assert(Result && "SISD intrinsic should have been handled");
5050     return Result;
5051   }
5052 
5053   llvm::APSInt Result;
5054   const Expr *Arg = E->getArg(E->getNumArgs()-1);
5055   NeonTypeFlags Type(0);
5056   if (Arg->isIntegerConstantExpr(Result, getContext()))
5057     // Determine the type of this overloaded NEON intrinsic.
5058     Type = NeonTypeFlags(Result.getZExtValue());
5059 
5060   bool usgn = Type.isUnsigned();
5061   bool quad = Type.isQuad();
5062 
5063   // Handle non-overloaded intrinsics first.
5064   switch (BuiltinID) {
5065   default: break;
5066   case NEON::BI__builtin_neon_vldrq_p128: {
5067     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5068     Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5069     return Builder.CreateDefaultAlignedLoad(Ptr);
5070   }
5071   case NEON::BI__builtin_neon_vstrq_p128: {
5072     llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5073     Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5074     return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
5075   }
5076   case NEON::BI__builtin_neon_vcvts_u32_f32:
5077   case NEON::BI__builtin_neon_vcvtd_u64_f64:
5078     usgn = true;
5079     // FALL THROUGH
5080   case NEON::BI__builtin_neon_vcvts_s32_f32:
5081   case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5082     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5083     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5084     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5085     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5086     Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5087     if (usgn)
5088       return Builder.CreateFPToUI(Ops[0], InTy);
5089     return Builder.CreateFPToSI(Ops[0], InTy);
5090   }
5091   case NEON::BI__builtin_neon_vcvts_f32_u32:
5092   case NEON::BI__builtin_neon_vcvtd_f64_u64:
5093     usgn = true;
5094     // FALL THROUGH
5095   case NEON::BI__builtin_neon_vcvts_f32_s32:
5096   case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5097     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5098     bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5099     llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5100     llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5101     Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5102     if (usgn)
5103       return Builder.CreateUIToFP(Ops[0], FTy);
5104     return Builder.CreateSIToFP(Ops[0], FTy);
5105   }
5106   case NEON::BI__builtin_neon_vpaddd_s64: {
5107     llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5108     Value *Vec = EmitScalarExpr(E->getArg(0));
5109     // The vector is v2f64, so make sure it's bitcast to that.
5110     Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5111     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5112     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5113     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5114     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5115     // Pairwise addition of a v2f64 into a scalar f64.
5116     return Builder.CreateAdd(Op0, Op1, "vpaddd");
5117   }
5118   case NEON::BI__builtin_neon_vpaddd_f64: {
5119     llvm::Type *Ty =
5120       llvm::VectorType::get(DoubleTy, 2);
5121     Value *Vec = EmitScalarExpr(E->getArg(0));
5122     // The vector is v2f64, so make sure it's bitcast to that.
5123     Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5124     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5125     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5126     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5127     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5128     // Pairwise addition of a v2f64 into a scalar f64.
5129     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5130   }
5131   case NEON::BI__builtin_neon_vpadds_f32: {
5132     llvm::Type *Ty =
5133       llvm::VectorType::get(FloatTy, 2);
5134     Value *Vec = EmitScalarExpr(E->getArg(0));
5135     // The vector is v2f32, so make sure it's bitcast to that.
5136     Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5137     llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5138     llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5139     Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5140     Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5141     // Pairwise addition of a v2f32 into a scalar f32.
5142     return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5143   }
5144   case NEON::BI__builtin_neon_vceqzd_s64:
5145   case NEON::BI__builtin_neon_vceqzd_f64:
5146   case NEON::BI__builtin_neon_vceqzs_f32:
5147     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5148     return EmitAArch64CompareBuiltinExpr(
5149         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5150         ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5151   case NEON::BI__builtin_neon_vcgezd_s64:
5152   case NEON::BI__builtin_neon_vcgezd_f64:
5153   case NEON::BI__builtin_neon_vcgezs_f32:
5154     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5155     return EmitAArch64CompareBuiltinExpr(
5156         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5157         ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5158   case NEON::BI__builtin_neon_vclezd_s64:
5159   case NEON::BI__builtin_neon_vclezd_f64:
5160   case NEON::BI__builtin_neon_vclezs_f32:
5161     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5162     return EmitAArch64CompareBuiltinExpr(
5163         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5164         ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5165   case NEON::BI__builtin_neon_vcgtzd_s64:
5166   case NEON::BI__builtin_neon_vcgtzd_f64:
5167   case NEON::BI__builtin_neon_vcgtzs_f32:
5168     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5169     return EmitAArch64CompareBuiltinExpr(
5170         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5171         ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5172   case NEON::BI__builtin_neon_vcltzd_s64:
5173   case NEON::BI__builtin_neon_vcltzd_f64:
5174   case NEON::BI__builtin_neon_vcltzs_f32:
5175     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5176     return EmitAArch64CompareBuiltinExpr(
5177         Ops[0], ConvertType(E->getCallReturnType(getContext())),
5178         ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5179 
5180   case NEON::BI__builtin_neon_vceqzd_u64: {
5181     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5182     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5183     Ops[0] =
5184         Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5185     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5186   }
5187   case NEON::BI__builtin_neon_vceqd_f64:
5188   case NEON::BI__builtin_neon_vcled_f64:
5189   case NEON::BI__builtin_neon_vcltd_f64:
5190   case NEON::BI__builtin_neon_vcged_f64:
5191   case NEON::BI__builtin_neon_vcgtd_f64: {
5192     llvm::CmpInst::Predicate P;
5193     switch (BuiltinID) {
5194     default: llvm_unreachable("missing builtin ID in switch!");
5195     case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5196     case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5197     case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5198     case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5199     case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5200     }
5201     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5202     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5203     Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5204     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5205     return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5206   }
5207   case NEON::BI__builtin_neon_vceqs_f32:
5208   case NEON::BI__builtin_neon_vcles_f32:
5209   case NEON::BI__builtin_neon_vclts_f32:
5210   case NEON::BI__builtin_neon_vcges_f32:
5211   case NEON::BI__builtin_neon_vcgts_f32: {
5212     llvm::CmpInst::Predicate P;
5213     switch (BuiltinID) {
5214     default: llvm_unreachable("missing builtin ID in switch!");
5215     case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5216     case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5217     case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5218     case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5219     case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5220     }
5221     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5222     Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5223     Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5224     Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5225     return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5226   }
5227   case NEON::BI__builtin_neon_vceqd_s64:
5228   case NEON::BI__builtin_neon_vceqd_u64:
5229   case NEON::BI__builtin_neon_vcgtd_s64:
5230   case NEON::BI__builtin_neon_vcgtd_u64:
5231   case NEON::BI__builtin_neon_vcltd_s64:
5232   case NEON::BI__builtin_neon_vcltd_u64:
5233   case NEON::BI__builtin_neon_vcged_u64:
5234   case NEON::BI__builtin_neon_vcged_s64:
5235   case NEON::BI__builtin_neon_vcled_u64:
5236   case NEON::BI__builtin_neon_vcled_s64: {
5237     llvm::CmpInst::Predicate P;
5238     switch (BuiltinID) {
5239     default: llvm_unreachable("missing builtin ID in switch!");
5240     case NEON::BI__builtin_neon_vceqd_s64:
5241     case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5242     case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5243     case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5244     case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5245     case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5246     case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5247     case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5248     case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5249     case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5250     }
5251     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5252     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5253     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5254     Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5255     return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5256   }
5257   case NEON::BI__builtin_neon_vtstd_s64:
5258   case NEON::BI__builtin_neon_vtstd_u64: {
5259     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5260     Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5261     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5262     Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5263     Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5264                                 llvm::Constant::getNullValue(Int64Ty));
5265     return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5266   }
5267   case NEON::BI__builtin_neon_vset_lane_i8:
5268   case NEON::BI__builtin_neon_vset_lane_i16:
5269   case NEON::BI__builtin_neon_vset_lane_i32:
5270   case NEON::BI__builtin_neon_vset_lane_i64:
5271   case NEON::BI__builtin_neon_vset_lane_f32:
5272   case NEON::BI__builtin_neon_vsetq_lane_i8:
5273   case NEON::BI__builtin_neon_vsetq_lane_i16:
5274   case NEON::BI__builtin_neon_vsetq_lane_i32:
5275   case NEON::BI__builtin_neon_vsetq_lane_i64:
5276   case NEON::BI__builtin_neon_vsetq_lane_f32:
5277     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5278     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5279   case NEON::BI__builtin_neon_vset_lane_f64:
5280     // The vector type needs a cast for the v1f64 variant.
5281     Ops[1] = Builder.CreateBitCast(Ops[1],
5282                                    llvm::VectorType::get(DoubleTy, 1));
5283     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5284     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5285   case NEON::BI__builtin_neon_vsetq_lane_f64:
5286     // The vector type needs a cast for the v2f64 variant.
5287     Ops[1] = Builder.CreateBitCast(Ops[1],
5288         llvm::VectorType::get(DoubleTy, 2));
5289     Ops.push_back(EmitScalarExpr(E->getArg(2)));
5290     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5291 
5292   case NEON::BI__builtin_neon_vget_lane_i8:
5293   case NEON::BI__builtin_neon_vdupb_lane_i8:
5294     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5295     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5296                                         "vget_lane");
5297   case NEON::BI__builtin_neon_vgetq_lane_i8:
5298   case NEON::BI__builtin_neon_vdupb_laneq_i8:
5299     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5300     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5301                                         "vgetq_lane");
5302   case NEON::BI__builtin_neon_vget_lane_i16:
5303   case NEON::BI__builtin_neon_vduph_lane_i16:
5304     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5305     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5306                                         "vget_lane");
5307   case NEON::BI__builtin_neon_vgetq_lane_i16:
5308   case NEON::BI__builtin_neon_vduph_laneq_i16:
5309     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5310     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5311                                         "vgetq_lane");
5312   case NEON::BI__builtin_neon_vget_lane_i32:
5313   case NEON::BI__builtin_neon_vdups_lane_i32:
5314     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5315     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5316                                         "vget_lane");
5317   case NEON::BI__builtin_neon_vdups_lane_f32:
5318     Ops[0] = Builder.CreateBitCast(Ops[0],
5319         llvm::VectorType::get(FloatTy, 2));
5320     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5321                                         "vdups_lane");
5322   case NEON::BI__builtin_neon_vgetq_lane_i32:
5323   case NEON::BI__builtin_neon_vdups_laneq_i32:
5324     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5325     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5326                                         "vgetq_lane");
5327   case NEON::BI__builtin_neon_vget_lane_i64:
5328   case NEON::BI__builtin_neon_vdupd_lane_i64:
5329     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5330     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5331                                         "vget_lane");
5332   case NEON::BI__builtin_neon_vdupd_lane_f64:
5333     Ops[0] = Builder.CreateBitCast(Ops[0],
5334         llvm::VectorType::get(DoubleTy, 1));
5335     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5336                                         "vdupd_lane");
5337   case NEON::BI__builtin_neon_vgetq_lane_i64:
5338   case NEON::BI__builtin_neon_vdupd_laneq_i64:
5339     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5340     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5341                                         "vgetq_lane");
5342   case NEON::BI__builtin_neon_vget_lane_f32:
5343     Ops[0] = Builder.CreateBitCast(Ops[0],
5344         llvm::VectorType::get(FloatTy, 2));
5345     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5346                                         "vget_lane");
5347   case NEON::BI__builtin_neon_vget_lane_f64:
5348     Ops[0] = Builder.CreateBitCast(Ops[0],
5349         llvm::VectorType::get(DoubleTy, 1));
5350     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5351                                         "vget_lane");
5352   case NEON::BI__builtin_neon_vgetq_lane_f32:
5353   case NEON::BI__builtin_neon_vdups_laneq_f32:
5354     Ops[0] = Builder.CreateBitCast(Ops[0],
5355         llvm::VectorType::get(FloatTy, 4));
5356     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5357                                         "vgetq_lane");
5358   case NEON::BI__builtin_neon_vgetq_lane_f64:
5359   case NEON::BI__builtin_neon_vdupd_laneq_f64:
5360     Ops[0] = Builder.CreateBitCast(Ops[0],
5361         llvm::VectorType::get(DoubleTy, 2));
5362     return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5363                                         "vgetq_lane");
5364   case NEON::BI__builtin_neon_vaddd_s64:
5365   case NEON::BI__builtin_neon_vaddd_u64:
5366     return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5367   case NEON::BI__builtin_neon_vsubd_s64:
5368   case NEON::BI__builtin_neon_vsubd_u64:
5369     return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5370   case NEON::BI__builtin_neon_vqdmlalh_s16:
5371   case NEON::BI__builtin_neon_vqdmlslh_s16: {
5372     SmallVector<Value *, 2> ProductOps;
5373     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5374     ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5375     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5376     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5377                           ProductOps, "vqdmlXl");
5378     Constant *CI = ConstantInt::get(SizeTy, 0);
5379     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5380 
5381     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5382                                         ? Intrinsic::aarch64_neon_sqadd
5383                                         : Intrinsic::aarch64_neon_sqsub;
5384     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5385   }
5386   case NEON::BI__builtin_neon_vqshlud_n_s64: {
5387     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5388     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5389     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5390                         Ops, "vqshlu_n");
5391   }
5392   case NEON::BI__builtin_neon_vqshld_n_u64:
5393   case NEON::BI__builtin_neon_vqshld_n_s64: {
5394     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5395                                    ? Intrinsic::aarch64_neon_uqshl
5396                                    : Intrinsic::aarch64_neon_sqshl;
5397     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5398     Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5399     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5400   }
5401   case NEON::BI__builtin_neon_vrshrd_n_u64:
5402   case NEON::BI__builtin_neon_vrshrd_n_s64: {
5403     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5404                                    ? Intrinsic::aarch64_neon_urshl
5405                                    : Intrinsic::aarch64_neon_srshl;
5406     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5407     int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5408     Ops[1] = ConstantInt::get(Int64Ty, -SV);
5409     return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5410   }
5411   case NEON::BI__builtin_neon_vrsrad_n_u64:
5412   case NEON::BI__builtin_neon_vrsrad_n_s64: {
5413     unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5414                                    ? Intrinsic::aarch64_neon_urshl
5415                                    : Intrinsic::aarch64_neon_srshl;
5416     Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5417     Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5418     Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5419                                 {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5420     return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5421   }
5422   case NEON::BI__builtin_neon_vshld_n_s64:
5423   case NEON::BI__builtin_neon_vshld_n_u64: {
5424     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5425     return Builder.CreateShl(
5426         Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5427   }
5428   case NEON::BI__builtin_neon_vshrd_n_s64: {
5429     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5430     return Builder.CreateAShr(
5431         Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5432                                                    Amt->getZExtValue())),
5433         "shrd_n");
5434   }
5435   case NEON::BI__builtin_neon_vshrd_n_u64: {
5436     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5437     uint64_t ShiftAmt = Amt->getZExtValue();
5438     // Right-shifting an unsigned value by its size yields 0.
5439     if (ShiftAmt == 64)
5440       return ConstantInt::get(Int64Ty, 0);
5441     return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5442                               "shrd_n");
5443   }
5444   case NEON::BI__builtin_neon_vsrad_n_s64: {
5445     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5446     Ops[1] = Builder.CreateAShr(
5447         Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5448                                                    Amt->getZExtValue())),
5449         "shrd_n");
5450     return Builder.CreateAdd(Ops[0], Ops[1]);
5451   }
5452   case NEON::BI__builtin_neon_vsrad_n_u64: {
5453     llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5454     uint64_t ShiftAmt = Amt->getZExtValue();
5455     // Right-shifting an unsigned value by its size yields 0.
5456     // As Op + 0 = Op, return Ops[0] directly.
5457     if (ShiftAmt == 64)
5458       return Ops[0];
5459     Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5460                                 "shrd_n");
5461     return Builder.CreateAdd(Ops[0], Ops[1]);
5462   }
5463   case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5464   case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5465   case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5466   case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5467     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5468                                           "lane");
5469     SmallVector<Value *, 2> ProductOps;
5470     ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5471     ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5472     llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5473     Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5474                           ProductOps, "vqdmlXl");
5475     Constant *CI = ConstantInt::get(SizeTy, 0);
5476     Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5477     Ops.pop_back();
5478 
5479     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
5480                        BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
5481                           ? Intrinsic::aarch64_neon_sqadd
5482                           : Intrinsic::aarch64_neon_sqsub;
5483     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
5484   }
5485   case NEON::BI__builtin_neon_vqdmlals_s32:
5486   case NEON::BI__builtin_neon_vqdmlsls_s32: {
5487     SmallVector<Value *, 2> ProductOps;
5488     ProductOps.push_back(Ops[1]);
5489     ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
5490     Ops[1] =
5491         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5492                      ProductOps, "vqdmlXl");
5493 
5494     unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
5495                                         ? Intrinsic::aarch64_neon_sqadd
5496                                         : Intrinsic::aarch64_neon_sqsub;
5497     return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
5498   }
5499   case NEON::BI__builtin_neon_vqdmlals_lane_s32:
5500   case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
5501   case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
5502   case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
5503     Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5504                                           "lane");
5505     SmallVector<Value *, 2> ProductOps;
5506     ProductOps.push_back(Ops[1]);
5507     ProductOps.push_back(Ops[2]);
5508     Ops[1] =
5509         EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
5510                      ProductOps, "vqdmlXl");
5511     Ops.pop_back();
5512 
5513     unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
5514                        BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
5515                           ? Intrinsic::aarch64_neon_sqadd
5516                           : Intrinsic::aarch64_neon_sqsub;
5517     return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
5518   }
5519   }
5520 
5521   llvm::VectorType *VTy = GetNeonType(this, Type);
5522   llvm::Type *Ty = VTy;
5523   if (!Ty)
5524     return nullptr;
5525 
5526   // Not all intrinsics handled by the common case work for AArch64 yet, so only
5527   // defer to common code if it's been added to our special map.
5528   Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
5529                                    AArch64SIMDIntrinsicsProvenSorted);
5530 
5531   if (Builtin)
5532     return EmitCommonNeonBuiltinExpr(
5533         Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
5534         Builtin->NameHint, Builtin->TypeModifier, E, Ops,
5535         /*never use addresses*/ Address::invalid(), Address::invalid());
5536 
5537   if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
5538     return V;
5539 
5540   unsigned Int;
5541   switch (BuiltinID) {
5542   default: return nullptr;
5543   case NEON::BI__builtin_neon_vbsl_v:
5544   case NEON::BI__builtin_neon_vbslq_v: {
5545     llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
5546     Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
5547     Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
5548     Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
5549 
5550     Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
5551     Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
5552     Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
5553     return Builder.CreateBitCast(Ops[0], Ty);
5554   }
5555   case NEON::BI__builtin_neon_vfma_lane_v:
5556   case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
5557     // The ARM builtins (and instructions) have the addend as the first
5558     // operand, but the 'fma' intrinsics have it last. Swap it around here.
5559     Value *Addend = Ops[0];
5560     Value *Multiplicand = Ops[1];
5561     Value *LaneSource = Ops[2];
5562     Ops[0] = Multiplicand;
5563     Ops[1] = LaneSource;
5564     Ops[2] = Addend;
5565 
5566     // Now adjust things to handle the lane access.
5567     llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
5568       llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
5569       VTy;
5570     llvm::Constant *cst = cast<Constant>(Ops[3]);
5571     Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
5572     Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
5573     Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
5574 
5575     Ops.pop_back();
5576     Int = Intrinsic::fma;
5577     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
5578   }
5579   case NEON::BI__builtin_neon_vfma_laneq_v: {
5580     llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
5581     // v1f64 fma should be mapped to Neon scalar f64 fma
5582     if (VTy && VTy->getElementType() == DoubleTy) {
5583       Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5584       Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5585       llvm::Type *VTy = GetNeonType(this,
5586         NeonTypeFlags(NeonTypeFlags::Float64, false, true));
5587       Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
5588       Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5589       Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
5590       Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5591       return Builder.CreateBitCast(Result, Ty);
5592     }
5593     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5594     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5595     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5596 
5597     llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
5598                                             VTy->getNumElements() * 2);
5599     Ops[2] = Builder.CreateBitCast(Ops[2], STy);
5600     Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
5601                                                cast<ConstantInt>(Ops[3]));
5602     Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
5603 
5604     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5605   }
5606   case NEON::BI__builtin_neon_vfmaq_laneq_v: {
5607     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5608     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5609     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5610 
5611     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5612     Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
5613     return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
5614   }
5615   case NEON::BI__builtin_neon_vfmas_lane_f32:
5616   case NEON::BI__builtin_neon_vfmas_laneq_f32:
5617   case NEON::BI__builtin_neon_vfmad_lane_f64:
5618   case NEON::BI__builtin_neon_vfmad_laneq_f64: {
5619     Ops.push_back(EmitScalarExpr(E->getArg(3)));
5620     llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5621     Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5622     Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
5623     return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5624   }
5625   case NEON::BI__builtin_neon_vmull_v:
5626     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5627     Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
5628     if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
5629     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5630   case NEON::BI__builtin_neon_vmax_v:
5631   case NEON::BI__builtin_neon_vmaxq_v:
5632     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5633     Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
5634     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
5635     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
5636   case NEON::BI__builtin_neon_vmin_v:
5637   case NEON::BI__builtin_neon_vminq_v:
5638     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5639     Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
5640     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
5641     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
5642   case NEON::BI__builtin_neon_vabd_v:
5643   case NEON::BI__builtin_neon_vabdq_v:
5644     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5645     Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
5646     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
5647     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
5648   case NEON::BI__builtin_neon_vpadal_v:
5649   case NEON::BI__builtin_neon_vpadalq_v: {
5650     unsigned ArgElts = VTy->getNumElements();
5651     llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
5652     unsigned BitWidth = EltTy->getBitWidth();
5653     llvm::Type *ArgTy = llvm::VectorType::get(
5654         llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
5655     llvm::Type* Tys[2] = { VTy, ArgTy };
5656     Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
5657     SmallVector<llvm::Value*, 1> TmpOps;
5658     TmpOps.push_back(Ops[1]);
5659     Function *F = CGM.getIntrinsic(Int, Tys);
5660     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
5661     llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
5662     return Builder.CreateAdd(tmp, addend);
5663   }
5664   case NEON::BI__builtin_neon_vpmin_v:
5665   case NEON::BI__builtin_neon_vpminq_v:
5666     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5667     Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
5668     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
5669     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
5670   case NEON::BI__builtin_neon_vpmax_v:
5671   case NEON::BI__builtin_neon_vpmaxq_v:
5672     // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
5673     Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
5674     if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
5675     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
5676   case NEON::BI__builtin_neon_vminnm_v:
5677   case NEON::BI__builtin_neon_vminnmq_v:
5678     Int = Intrinsic::aarch64_neon_fminnm;
5679     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
5680   case NEON::BI__builtin_neon_vmaxnm_v:
5681   case NEON::BI__builtin_neon_vmaxnmq_v:
5682     Int = Intrinsic::aarch64_neon_fmaxnm;
5683     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
5684   case NEON::BI__builtin_neon_vrecpss_f32: {
5685     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5686     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
5687                         Ops, "vrecps");
5688   }
5689   case NEON::BI__builtin_neon_vrecpsd_f64: {
5690     Ops.push_back(EmitScalarExpr(E->getArg(1)));
5691     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
5692                         Ops, "vrecps");
5693   }
5694   case NEON::BI__builtin_neon_vqshrun_n_v:
5695     Int = Intrinsic::aarch64_neon_sqshrun;
5696     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
5697   case NEON::BI__builtin_neon_vqrshrun_n_v:
5698     Int = Intrinsic::aarch64_neon_sqrshrun;
5699     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
5700   case NEON::BI__builtin_neon_vqshrn_n_v:
5701     Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
5702     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
5703   case NEON::BI__builtin_neon_vrshrn_n_v:
5704     Int = Intrinsic::aarch64_neon_rshrn;
5705     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
5706   case NEON::BI__builtin_neon_vqrshrn_n_v:
5707     Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
5708     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
5709   case NEON::BI__builtin_neon_vrnda_v:
5710   case NEON::BI__builtin_neon_vrndaq_v: {
5711     Int = Intrinsic::round;
5712     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
5713   }
5714   case NEON::BI__builtin_neon_vrndi_v:
5715   case NEON::BI__builtin_neon_vrndiq_v: {
5716     Int = Intrinsic::nearbyint;
5717     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
5718   }
5719   case NEON::BI__builtin_neon_vrndm_v:
5720   case NEON::BI__builtin_neon_vrndmq_v: {
5721     Int = Intrinsic::floor;
5722     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
5723   }
5724   case NEON::BI__builtin_neon_vrndn_v:
5725   case NEON::BI__builtin_neon_vrndnq_v: {
5726     Int = Intrinsic::aarch64_neon_frintn;
5727     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
5728   }
5729   case NEON::BI__builtin_neon_vrndp_v:
5730   case NEON::BI__builtin_neon_vrndpq_v: {
5731     Int = Intrinsic::ceil;
5732     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
5733   }
5734   case NEON::BI__builtin_neon_vrndx_v:
5735   case NEON::BI__builtin_neon_vrndxq_v: {
5736     Int = Intrinsic::rint;
5737     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
5738   }
5739   case NEON::BI__builtin_neon_vrnd_v:
5740   case NEON::BI__builtin_neon_vrndq_v: {
5741     Int = Intrinsic::trunc;
5742     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
5743   }
5744   case NEON::BI__builtin_neon_vceqz_v:
5745   case NEON::BI__builtin_neon_vceqzq_v:
5746     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5747                                          ICmpInst::ICMP_EQ, "vceqz");
5748   case NEON::BI__builtin_neon_vcgez_v:
5749   case NEON::BI__builtin_neon_vcgezq_v:
5750     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5751                                          ICmpInst::ICMP_SGE, "vcgez");
5752   case NEON::BI__builtin_neon_vclez_v:
5753   case NEON::BI__builtin_neon_vclezq_v:
5754     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5755                                          ICmpInst::ICMP_SLE, "vclez");
5756   case NEON::BI__builtin_neon_vcgtz_v:
5757   case NEON::BI__builtin_neon_vcgtzq_v:
5758     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5759                                          ICmpInst::ICMP_SGT, "vcgtz");
5760   case NEON::BI__builtin_neon_vcltz_v:
5761   case NEON::BI__builtin_neon_vcltzq_v:
5762     return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5763                                          ICmpInst::ICMP_SLT, "vcltz");
5764   case NEON::BI__builtin_neon_vcvt_f64_v:
5765   case NEON::BI__builtin_neon_vcvtq_f64_v:
5766     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5767     Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
5768     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5769                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5770   case NEON::BI__builtin_neon_vcvt_f64_f32: {
5771     assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
5772            "unexpected vcvt_f64_f32 builtin");
5773     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
5774     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5775 
5776     return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
5777   }
5778   case NEON::BI__builtin_neon_vcvt_f32_f64: {
5779     assert(Type.getEltType() == NeonTypeFlags::Float32 &&
5780            "unexpected vcvt_f32_f64 builtin");
5781     NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
5782     Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
5783 
5784     return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
5785   }
5786   case NEON::BI__builtin_neon_vcvt_s32_v:
5787   case NEON::BI__builtin_neon_vcvt_u32_v:
5788   case NEON::BI__builtin_neon_vcvt_s64_v:
5789   case NEON::BI__builtin_neon_vcvt_u64_v:
5790   case NEON::BI__builtin_neon_vcvtq_s32_v:
5791   case NEON::BI__builtin_neon_vcvtq_u32_v:
5792   case NEON::BI__builtin_neon_vcvtq_s64_v:
5793   case NEON::BI__builtin_neon_vcvtq_u64_v: {
5794     Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5795     if (usgn)
5796       return Builder.CreateFPToUI(Ops[0], Ty);
5797     return Builder.CreateFPToSI(Ops[0], Ty);
5798   }
5799   case NEON::BI__builtin_neon_vcvta_s32_v:
5800   case NEON::BI__builtin_neon_vcvtaq_s32_v:
5801   case NEON::BI__builtin_neon_vcvta_u32_v:
5802   case NEON::BI__builtin_neon_vcvtaq_u32_v:
5803   case NEON::BI__builtin_neon_vcvta_s64_v:
5804   case NEON::BI__builtin_neon_vcvtaq_s64_v:
5805   case NEON::BI__builtin_neon_vcvta_u64_v:
5806   case NEON::BI__builtin_neon_vcvtaq_u64_v: {
5807     Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
5808     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5809     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
5810   }
5811   case NEON::BI__builtin_neon_vcvtm_s32_v:
5812   case NEON::BI__builtin_neon_vcvtmq_s32_v:
5813   case NEON::BI__builtin_neon_vcvtm_u32_v:
5814   case NEON::BI__builtin_neon_vcvtmq_u32_v:
5815   case NEON::BI__builtin_neon_vcvtm_s64_v:
5816   case NEON::BI__builtin_neon_vcvtmq_s64_v:
5817   case NEON::BI__builtin_neon_vcvtm_u64_v:
5818   case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5819     Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
5820     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5821     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
5822   }
5823   case NEON::BI__builtin_neon_vcvtn_s32_v:
5824   case NEON::BI__builtin_neon_vcvtnq_s32_v:
5825   case NEON::BI__builtin_neon_vcvtn_u32_v:
5826   case NEON::BI__builtin_neon_vcvtnq_u32_v:
5827   case NEON::BI__builtin_neon_vcvtn_s64_v:
5828   case NEON::BI__builtin_neon_vcvtnq_s64_v:
5829   case NEON::BI__builtin_neon_vcvtn_u64_v:
5830   case NEON::BI__builtin_neon_vcvtnq_u64_v: {
5831     Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
5832     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5833     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
5834   }
5835   case NEON::BI__builtin_neon_vcvtp_s32_v:
5836   case NEON::BI__builtin_neon_vcvtpq_s32_v:
5837   case NEON::BI__builtin_neon_vcvtp_u32_v:
5838   case NEON::BI__builtin_neon_vcvtpq_u32_v:
5839   case NEON::BI__builtin_neon_vcvtp_s64_v:
5840   case NEON::BI__builtin_neon_vcvtpq_s64_v:
5841   case NEON::BI__builtin_neon_vcvtp_u64_v:
5842   case NEON::BI__builtin_neon_vcvtpq_u64_v: {
5843     Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
5844     llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5845     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
5846   }
5847   case NEON::BI__builtin_neon_vmulx_v:
5848   case NEON::BI__builtin_neon_vmulxq_v: {
5849     Int = Intrinsic::aarch64_neon_fmulx;
5850     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
5851   }
5852   case NEON::BI__builtin_neon_vmul_lane_v:
5853   case NEON::BI__builtin_neon_vmul_laneq_v: {
5854     // v1f64 vmul_lane should be mapped to Neon scalar mul lane
5855     bool Quad = false;
5856     if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
5857       Quad = true;
5858     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5859     llvm::Type *VTy = GetNeonType(this,
5860       NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
5861     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
5862     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
5863     Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
5864     return Builder.CreateBitCast(Result, Ty);
5865   }
5866   case NEON::BI__builtin_neon_vnegd_s64:
5867     return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
5868   case NEON::BI__builtin_neon_vpmaxnm_v:
5869   case NEON::BI__builtin_neon_vpmaxnmq_v: {
5870     Int = Intrinsic::aarch64_neon_fmaxnmp;
5871     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
5872   }
5873   case NEON::BI__builtin_neon_vpminnm_v:
5874   case NEON::BI__builtin_neon_vpminnmq_v: {
5875     Int = Intrinsic::aarch64_neon_fminnmp;
5876     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
5877   }
5878   case NEON::BI__builtin_neon_vsqrt_v:
5879   case NEON::BI__builtin_neon_vsqrtq_v: {
5880     Int = Intrinsic::sqrt;
5881     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5882     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
5883   }
5884   case NEON::BI__builtin_neon_vrbit_v:
5885   case NEON::BI__builtin_neon_vrbitq_v: {
5886     Int = Intrinsic::aarch64_neon_rbit;
5887     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
5888   }
5889   case NEON::BI__builtin_neon_vaddv_u8:
5890     // FIXME: These are handled by the AArch64 scalar code.
5891     usgn = true;
5892     // FALLTHROUGH
5893   case NEON::BI__builtin_neon_vaddv_s8: {
5894     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5895     Ty = Int32Ty;
5896     VTy = llvm::VectorType::get(Int8Ty, 8);
5897     llvm::Type *Tys[2] = { Ty, VTy };
5898     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5899     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5900     return Builder.CreateTrunc(Ops[0], Int8Ty);
5901   }
5902   case NEON::BI__builtin_neon_vaddv_u16:
5903     usgn = true;
5904     // FALLTHROUGH
5905   case NEON::BI__builtin_neon_vaddv_s16: {
5906     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5907     Ty = Int32Ty;
5908     VTy = llvm::VectorType::get(Int16Ty, 4);
5909     llvm::Type *Tys[2] = { Ty, VTy };
5910     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5911     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5912     return Builder.CreateTrunc(Ops[0], Int16Ty);
5913   }
5914   case NEON::BI__builtin_neon_vaddvq_u8:
5915     usgn = true;
5916     // FALLTHROUGH
5917   case NEON::BI__builtin_neon_vaddvq_s8: {
5918     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5919     Ty = Int32Ty;
5920     VTy = llvm::VectorType::get(Int8Ty, 16);
5921     llvm::Type *Tys[2] = { Ty, VTy };
5922     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5923     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5924     return Builder.CreateTrunc(Ops[0], Int8Ty);
5925   }
5926   case NEON::BI__builtin_neon_vaddvq_u16:
5927     usgn = true;
5928     // FALLTHROUGH
5929   case NEON::BI__builtin_neon_vaddvq_s16: {
5930     Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
5931     Ty = Int32Ty;
5932     VTy = llvm::VectorType::get(Int16Ty, 8);
5933     llvm::Type *Tys[2] = { Ty, VTy };
5934     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5935     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
5936     return Builder.CreateTrunc(Ops[0], Int16Ty);
5937   }
5938   case NEON::BI__builtin_neon_vmaxv_u8: {
5939     Int = Intrinsic::aarch64_neon_umaxv;
5940     Ty = Int32Ty;
5941     VTy = llvm::VectorType::get(Int8Ty, 8);
5942     llvm::Type *Tys[2] = { Ty, VTy };
5943     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5944     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5945     return Builder.CreateTrunc(Ops[0], Int8Ty);
5946   }
5947   case NEON::BI__builtin_neon_vmaxv_u16: {
5948     Int = Intrinsic::aarch64_neon_umaxv;
5949     Ty = Int32Ty;
5950     VTy = llvm::VectorType::get(Int16Ty, 4);
5951     llvm::Type *Tys[2] = { Ty, VTy };
5952     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5953     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5954     return Builder.CreateTrunc(Ops[0], Int16Ty);
5955   }
5956   case NEON::BI__builtin_neon_vmaxvq_u8: {
5957     Int = Intrinsic::aarch64_neon_umaxv;
5958     Ty = Int32Ty;
5959     VTy = llvm::VectorType::get(Int8Ty, 16);
5960     llvm::Type *Tys[2] = { Ty, VTy };
5961     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5962     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5963     return Builder.CreateTrunc(Ops[0], Int8Ty);
5964   }
5965   case NEON::BI__builtin_neon_vmaxvq_u16: {
5966     Int = Intrinsic::aarch64_neon_umaxv;
5967     Ty = Int32Ty;
5968     VTy = llvm::VectorType::get(Int16Ty, 8);
5969     llvm::Type *Tys[2] = { Ty, VTy };
5970     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5971     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5972     return Builder.CreateTrunc(Ops[0], Int16Ty);
5973   }
5974   case NEON::BI__builtin_neon_vmaxv_s8: {
5975     Int = Intrinsic::aarch64_neon_smaxv;
5976     Ty = Int32Ty;
5977     VTy = llvm::VectorType::get(Int8Ty, 8);
5978     llvm::Type *Tys[2] = { Ty, VTy };
5979     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5980     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5981     return Builder.CreateTrunc(Ops[0], Int8Ty);
5982   }
5983   case NEON::BI__builtin_neon_vmaxv_s16: {
5984     Int = Intrinsic::aarch64_neon_smaxv;
5985     Ty = Int32Ty;
5986     VTy = llvm::VectorType::get(Int16Ty, 4);
5987     llvm::Type *Tys[2] = { Ty, VTy };
5988     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5989     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5990     return Builder.CreateTrunc(Ops[0], Int16Ty);
5991   }
5992   case NEON::BI__builtin_neon_vmaxvq_s8: {
5993     Int = Intrinsic::aarch64_neon_smaxv;
5994     Ty = Int32Ty;
5995     VTy = llvm::VectorType::get(Int8Ty, 16);
5996     llvm::Type *Tys[2] = { Ty, VTy };
5997     Ops.push_back(EmitScalarExpr(E->getArg(0)));
5998     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
5999     return Builder.CreateTrunc(Ops[0], Int8Ty);
6000   }
6001   case NEON::BI__builtin_neon_vmaxvq_s16: {
6002     Int = Intrinsic::aarch64_neon_smaxv;
6003     Ty = Int32Ty;
6004     VTy = llvm::VectorType::get(Int16Ty, 8);
6005     llvm::Type *Tys[2] = { Ty, VTy };
6006     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6007     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6008     return Builder.CreateTrunc(Ops[0], Int16Ty);
6009   }
6010   case NEON::BI__builtin_neon_vminv_u8: {
6011     Int = Intrinsic::aarch64_neon_uminv;
6012     Ty = Int32Ty;
6013     VTy = llvm::VectorType::get(Int8Ty, 8);
6014     llvm::Type *Tys[2] = { Ty, VTy };
6015     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6016     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6017     return Builder.CreateTrunc(Ops[0], Int8Ty);
6018   }
6019   case NEON::BI__builtin_neon_vminv_u16: {
6020     Int = Intrinsic::aarch64_neon_uminv;
6021     Ty = Int32Ty;
6022     VTy = llvm::VectorType::get(Int16Ty, 4);
6023     llvm::Type *Tys[2] = { Ty, VTy };
6024     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6025     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6026     return Builder.CreateTrunc(Ops[0], Int16Ty);
6027   }
6028   case NEON::BI__builtin_neon_vminvq_u8: {
6029     Int = Intrinsic::aarch64_neon_uminv;
6030     Ty = Int32Ty;
6031     VTy = llvm::VectorType::get(Int8Ty, 16);
6032     llvm::Type *Tys[2] = { Ty, VTy };
6033     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6034     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6035     return Builder.CreateTrunc(Ops[0], Int8Ty);
6036   }
6037   case NEON::BI__builtin_neon_vminvq_u16: {
6038     Int = Intrinsic::aarch64_neon_uminv;
6039     Ty = Int32Ty;
6040     VTy = llvm::VectorType::get(Int16Ty, 8);
6041     llvm::Type *Tys[2] = { Ty, VTy };
6042     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6043     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6044     return Builder.CreateTrunc(Ops[0], Int16Ty);
6045   }
6046   case NEON::BI__builtin_neon_vminv_s8: {
6047     Int = Intrinsic::aarch64_neon_sminv;
6048     Ty = Int32Ty;
6049     VTy = llvm::VectorType::get(Int8Ty, 8);
6050     llvm::Type *Tys[2] = { Ty, VTy };
6051     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6052     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6053     return Builder.CreateTrunc(Ops[0], Int8Ty);
6054   }
6055   case NEON::BI__builtin_neon_vminv_s16: {
6056     Int = Intrinsic::aarch64_neon_sminv;
6057     Ty = Int32Ty;
6058     VTy = llvm::VectorType::get(Int16Ty, 4);
6059     llvm::Type *Tys[2] = { Ty, VTy };
6060     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6061     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6062     return Builder.CreateTrunc(Ops[0], Int16Ty);
6063   }
6064   case NEON::BI__builtin_neon_vminvq_s8: {
6065     Int = Intrinsic::aarch64_neon_sminv;
6066     Ty = Int32Ty;
6067     VTy = llvm::VectorType::get(Int8Ty, 16);
6068     llvm::Type *Tys[2] = { Ty, VTy };
6069     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6070     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6071     return Builder.CreateTrunc(Ops[0], Int8Ty);
6072   }
6073   case NEON::BI__builtin_neon_vminvq_s16: {
6074     Int = Intrinsic::aarch64_neon_sminv;
6075     Ty = Int32Ty;
6076     VTy = llvm::VectorType::get(Int16Ty, 8);
6077     llvm::Type *Tys[2] = { Ty, VTy };
6078     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6079     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6080     return Builder.CreateTrunc(Ops[0], Int16Ty);
6081   }
6082   case NEON::BI__builtin_neon_vmul_n_f64: {
6083     Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6084     Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6085     return Builder.CreateFMul(Ops[0], RHS);
6086   }
6087   case NEON::BI__builtin_neon_vaddlv_u8: {
6088     Int = Intrinsic::aarch64_neon_uaddlv;
6089     Ty = Int32Ty;
6090     VTy = llvm::VectorType::get(Int8Ty, 8);
6091     llvm::Type *Tys[2] = { Ty, VTy };
6092     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6093     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6094     return Builder.CreateTrunc(Ops[0], Int16Ty);
6095   }
6096   case NEON::BI__builtin_neon_vaddlv_u16: {
6097     Int = Intrinsic::aarch64_neon_uaddlv;
6098     Ty = Int32Ty;
6099     VTy = llvm::VectorType::get(Int16Ty, 4);
6100     llvm::Type *Tys[2] = { Ty, VTy };
6101     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6102     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6103   }
6104   case NEON::BI__builtin_neon_vaddlvq_u8: {
6105     Int = Intrinsic::aarch64_neon_uaddlv;
6106     Ty = Int32Ty;
6107     VTy = llvm::VectorType::get(Int8Ty, 16);
6108     llvm::Type *Tys[2] = { Ty, VTy };
6109     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6110     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6111     return Builder.CreateTrunc(Ops[0], Int16Ty);
6112   }
6113   case NEON::BI__builtin_neon_vaddlvq_u16: {
6114     Int = Intrinsic::aarch64_neon_uaddlv;
6115     Ty = Int32Ty;
6116     VTy = llvm::VectorType::get(Int16Ty, 8);
6117     llvm::Type *Tys[2] = { Ty, VTy };
6118     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6119     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6120   }
6121   case NEON::BI__builtin_neon_vaddlv_s8: {
6122     Int = Intrinsic::aarch64_neon_saddlv;
6123     Ty = Int32Ty;
6124     VTy = llvm::VectorType::get(Int8Ty, 8);
6125     llvm::Type *Tys[2] = { Ty, VTy };
6126     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6127     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6128     return Builder.CreateTrunc(Ops[0], Int16Ty);
6129   }
6130   case NEON::BI__builtin_neon_vaddlv_s16: {
6131     Int = Intrinsic::aarch64_neon_saddlv;
6132     Ty = Int32Ty;
6133     VTy = llvm::VectorType::get(Int16Ty, 4);
6134     llvm::Type *Tys[2] = { Ty, VTy };
6135     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6136     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6137   }
6138   case NEON::BI__builtin_neon_vaddlvq_s8: {
6139     Int = Intrinsic::aarch64_neon_saddlv;
6140     Ty = Int32Ty;
6141     VTy = llvm::VectorType::get(Int8Ty, 16);
6142     llvm::Type *Tys[2] = { Ty, VTy };
6143     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6144     Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6145     return Builder.CreateTrunc(Ops[0], Int16Ty);
6146   }
6147   case NEON::BI__builtin_neon_vaddlvq_s16: {
6148     Int = Intrinsic::aarch64_neon_saddlv;
6149     Ty = Int32Ty;
6150     VTy = llvm::VectorType::get(Int16Ty, 8);
6151     llvm::Type *Tys[2] = { Ty, VTy };
6152     Ops.push_back(EmitScalarExpr(E->getArg(0)));
6153     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6154   }
6155   case NEON::BI__builtin_neon_vsri_n_v:
6156   case NEON::BI__builtin_neon_vsriq_n_v: {
6157     Int = Intrinsic::aarch64_neon_vsri;
6158     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6159     return EmitNeonCall(Intrin, Ops, "vsri_n");
6160   }
6161   case NEON::BI__builtin_neon_vsli_n_v:
6162   case NEON::BI__builtin_neon_vsliq_n_v: {
6163     Int = Intrinsic::aarch64_neon_vsli;
6164     llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6165     return EmitNeonCall(Intrin, Ops, "vsli_n");
6166   }
6167   case NEON::BI__builtin_neon_vsra_n_v:
6168   case NEON::BI__builtin_neon_vsraq_n_v:
6169     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6170     Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6171     return Builder.CreateAdd(Ops[0], Ops[1]);
6172   case NEON::BI__builtin_neon_vrsra_n_v:
6173   case NEON::BI__builtin_neon_vrsraq_n_v: {
6174     Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6175     SmallVector<llvm::Value*,2> TmpOps;
6176     TmpOps.push_back(Ops[1]);
6177     TmpOps.push_back(Ops[2]);
6178     Function* F = CGM.getIntrinsic(Int, Ty);
6179     llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6180     Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6181     return Builder.CreateAdd(Ops[0], tmp);
6182   }
6183     // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6184     // of an Align parameter here.
6185   case NEON::BI__builtin_neon_vld1_x2_v:
6186   case NEON::BI__builtin_neon_vld1q_x2_v:
6187   case NEON::BI__builtin_neon_vld1_x3_v:
6188   case NEON::BI__builtin_neon_vld1q_x3_v:
6189   case NEON::BI__builtin_neon_vld1_x4_v:
6190   case NEON::BI__builtin_neon_vld1q_x4_v: {
6191     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6192     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6193     llvm::Type *Tys[2] = { VTy, PTy };
6194     unsigned Int;
6195     switch (BuiltinID) {
6196     case NEON::BI__builtin_neon_vld1_x2_v:
6197     case NEON::BI__builtin_neon_vld1q_x2_v:
6198       Int = Intrinsic::aarch64_neon_ld1x2;
6199       break;
6200     case NEON::BI__builtin_neon_vld1_x3_v:
6201     case NEON::BI__builtin_neon_vld1q_x3_v:
6202       Int = Intrinsic::aarch64_neon_ld1x3;
6203       break;
6204     case NEON::BI__builtin_neon_vld1_x4_v:
6205     case NEON::BI__builtin_neon_vld1q_x4_v:
6206       Int = Intrinsic::aarch64_neon_ld1x4;
6207       break;
6208     }
6209     Function *F = CGM.getIntrinsic(Int, Tys);
6210     Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6211     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6212     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6213     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6214   }
6215   case NEON::BI__builtin_neon_vst1_x2_v:
6216   case NEON::BI__builtin_neon_vst1q_x2_v:
6217   case NEON::BI__builtin_neon_vst1_x3_v:
6218   case NEON::BI__builtin_neon_vst1q_x3_v:
6219   case NEON::BI__builtin_neon_vst1_x4_v:
6220   case NEON::BI__builtin_neon_vst1q_x4_v: {
6221     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6222     llvm::Type *Tys[2] = { VTy, PTy };
6223     unsigned Int;
6224     switch (BuiltinID) {
6225     case NEON::BI__builtin_neon_vst1_x2_v:
6226     case NEON::BI__builtin_neon_vst1q_x2_v:
6227       Int = Intrinsic::aarch64_neon_st1x2;
6228       break;
6229     case NEON::BI__builtin_neon_vst1_x3_v:
6230     case NEON::BI__builtin_neon_vst1q_x3_v:
6231       Int = Intrinsic::aarch64_neon_st1x3;
6232       break;
6233     case NEON::BI__builtin_neon_vst1_x4_v:
6234     case NEON::BI__builtin_neon_vst1q_x4_v:
6235       Int = Intrinsic::aarch64_neon_st1x4;
6236       break;
6237     }
6238     std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6239     return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6240   }
6241   case NEON::BI__builtin_neon_vld1_v:
6242   case NEON::BI__builtin_neon_vld1q_v:
6243     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6244     return Builder.CreateDefaultAlignedLoad(Ops[0]);
6245   case NEON::BI__builtin_neon_vst1_v:
6246   case NEON::BI__builtin_neon_vst1q_v:
6247     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6248     Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6249     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6250   case NEON::BI__builtin_neon_vld1_lane_v:
6251   case NEON::BI__builtin_neon_vld1q_lane_v:
6252     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6253     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6254     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6255     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6256     return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6257   case NEON::BI__builtin_neon_vld1_dup_v:
6258   case NEON::BI__builtin_neon_vld1q_dup_v: {
6259     Value *V = UndefValue::get(Ty);
6260     Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6261     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6262     Ops[0] = Builder.CreateDefaultAlignedLoad(Ops[0]);
6263     llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6264     Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6265     return EmitNeonSplat(Ops[0], CI);
6266   }
6267   case NEON::BI__builtin_neon_vst1_lane_v:
6268   case NEON::BI__builtin_neon_vst1q_lane_v:
6269     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6270     Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6271     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6272     return Builder.CreateDefaultAlignedStore(Ops[1],
6273                                              Builder.CreateBitCast(Ops[0], Ty));
6274   case NEON::BI__builtin_neon_vld2_v:
6275   case NEON::BI__builtin_neon_vld2q_v: {
6276     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6277     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6278     llvm::Type *Tys[2] = { VTy, PTy };
6279     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6280     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6281     Ops[0] = Builder.CreateBitCast(Ops[0],
6282                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6283     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6284   }
6285   case NEON::BI__builtin_neon_vld3_v:
6286   case NEON::BI__builtin_neon_vld3q_v: {
6287     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6288     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6289     llvm::Type *Tys[2] = { VTy, PTy };
6290     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6291     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6292     Ops[0] = Builder.CreateBitCast(Ops[0],
6293                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6294     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6295   }
6296   case NEON::BI__builtin_neon_vld4_v:
6297   case NEON::BI__builtin_neon_vld4q_v: {
6298     llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6299     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6300     llvm::Type *Tys[2] = { VTy, PTy };
6301     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6302     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6303     Ops[0] = Builder.CreateBitCast(Ops[0],
6304                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6305     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6306   }
6307   case NEON::BI__builtin_neon_vld2_dup_v:
6308   case NEON::BI__builtin_neon_vld2q_dup_v: {
6309     llvm::Type *PTy =
6310       llvm::PointerType::getUnqual(VTy->getElementType());
6311     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6312     llvm::Type *Tys[2] = { VTy, PTy };
6313     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6314     Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6315     Ops[0] = Builder.CreateBitCast(Ops[0],
6316                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6317     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6318   }
6319   case NEON::BI__builtin_neon_vld3_dup_v:
6320   case NEON::BI__builtin_neon_vld3q_dup_v: {
6321     llvm::Type *PTy =
6322       llvm::PointerType::getUnqual(VTy->getElementType());
6323     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6324     llvm::Type *Tys[2] = { VTy, PTy };
6325     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6326     Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6327     Ops[0] = Builder.CreateBitCast(Ops[0],
6328                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6329     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6330   }
6331   case NEON::BI__builtin_neon_vld4_dup_v:
6332   case NEON::BI__builtin_neon_vld4q_dup_v: {
6333     llvm::Type *PTy =
6334       llvm::PointerType::getUnqual(VTy->getElementType());
6335     Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6336     llvm::Type *Tys[2] = { VTy, PTy };
6337     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6338     Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6339     Ops[0] = Builder.CreateBitCast(Ops[0],
6340                 llvm::PointerType::getUnqual(Ops[1]->getType()));
6341     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6342   }
6343   case NEON::BI__builtin_neon_vld2_lane_v:
6344   case NEON::BI__builtin_neon_vld2q_lane_v: {
6345     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6346     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6347     Ops.push_back(Ops[1]);
6348     Ops.erase(Ops.begin()+1);
6349     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6350     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6351     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6352     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6353     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6354     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6355     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6356   }
6357   case NEON::BI__builtin_neon_vld3_lane_v:
6358   case NEON::BI__builtin_neon_vld3q_lane_v: {
6359     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6360     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6361     Ops.push_back(Ops[1]);
6362     Ops.erase(Ops.begin()+1);
6363     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6364     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6365     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6366     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6367     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6368     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6369     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6370     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6371   }
6372   case NEON::BI__builtin_neon_vld4_lane_v:
6373   case NEON::BI__builtin_neon_vld4q_lane_v: {
6374     llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6375     Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6376     Ops.push_back(Ops[1]);
6377     Ops.erase(Ops.begin()+1);
6378     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6379     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6380     Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6381     Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6382     Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6383     Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6384     Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6385     Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6386     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6387   }
6388   case NEON::BI__builtin_neon_vst2_v:
6389   case NEON::BI__builtin_neon_vst2q_v: {
6390     Ops.push_back(Ops[0]);
6391     Ops.erase(Ops.begin());
6392     llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6393     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6394                         Ops, "");
6395   }
6396   case NEON::BI__builtin_neon_vst2_lane_v:
6397   case NEON::BI__builtin_neon_vst2q_lane_v: {
6398     Ops.push_back(Ops[0]);
6399     Ops.erase(Ops.begin());
6400     Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6401     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6402     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6403                         Ops, "");
6404   }
6405   case NEON::BI__builtin_neon_vst3_v:
6406   case NEON::BI__builtin_neon_vst3q_v: {
6407     Ops.push_back(Ops[0]);
6408     Ops.erase(Ops.begin());
6409     llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6410     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6411                         Ops, "");
6412   }
6413   case NEON::BI__builtin_neon_vst3_lane_v:
6414   case NEON::BI__builtin_neon_vst3q_lane_v: {
6415     Ops.push_back(Ops[0]);
6416     Ops.erase(Ops.begin());
6417     Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6418     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6419     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6420                         Ops, "");
6421   }
6422   case NEON::BI__builtin_neon_vst4_v:
6423   case NEON::BI__builtin_neon_vst4q_v: {
6424     Ops.push_back(Ops[0]);
6425     Ops.erase(Ops.begin());
6426     llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6427     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6428                         Ops, "");
6429   }
6430   case NEON::BI__builtin_neon_vst4_lane_v:
6431   case NEON::BI__builtin_neon_vst4q_lane_v: {
6432     Ops.push_back(Ops[0]);
6433     Ops.erase(Ops.begin());
6434     Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6435     llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6436     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6437                         Ops, "");
6438   }
6439   case NEON::BI__builtin_neon_vtrn_v:
6440   case NEON::BI__builtin_neon_vtrnq_v: {
6441     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6442     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6443     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6444     Value *SV = nullptr;
6445 
6446     for (unsigned vi = 0; vi != 2; ++vi) {
6447       SmallVector<uint32_t, 16> Indices;
6448       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6449         Indices.push_back(i+vi);
6450         Indices.push_back(i+e+vi);
6451       }
6452       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6453       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6454       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6455     }
6456     return SV;
6457   }
6458   case NEON::BI__builtin_neon_vuzp_v:
6459   case NEON::BI__builtin_neon_vuzpq_v: {
6460     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6461     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6462     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6463     Value *SV = nullptr;
6464 
6465     for (unsigned vi = 0; vi != 2; ++vi) {
6466       SmallVector<uint32_t, 16> Indices;
6467       for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
6468         Indices.push_back(2*i+vi);
6469 
6470       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6471       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
6472       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6473     }
6474     return SV;
6475   }
6476   case NEON::BI__builtin_neon_vzip_v:
6477   case NEON::BI__builtin_neon_vzipq_v: {
6478     Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6479     Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6480     Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6481     Value *SV = nullptr;
6482 
6483     for (unsigned vi = 0; vi != 2; ++vi) {
6484       SmallVector<uint32_t, 16> Indices;
6485       for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6486         Indices.push_back((i + vi*e) >> 1);
6487         Indices.push_back(((i + vi*e) >> 1)+e);
6488       }
6489       Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6490       SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
6491       SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6492     }
6493     return SV;
6494   }
6495   case NEON::BI__builtin_neon_vqtbl1q_v: {
6496     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
6497                         Ops, "vtbl1");
6498   }
6499   case NEON::BI__builtin_neon_vqtbl2q_v: {
6500     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
6501                         Ops, "vtbl2");
6502   }
6503   case NEON::BI__builtin_neon_vqtbl3q_v: {
6504     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
6505                         Ops, "vtbl3");
6506   }
6507   case NEON::BI__builtin_neon_vqtbl4q_v: {
6508     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
6509                         Ops, "vtbl4");
6510   }
6511   case NEON::BI__builtin_neon_vqtbx1q_v: {
6512     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
6513                         Ops, "vtbx1");
6514   }
6515   case NEON::BI__builtin_neon_vqtbx2q_v: {
6516     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
6517                         Ops, "vtbx2");
6518   }
6519   case NEON::BI__builtin_neon_vqtbx3q_v: {
6520     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
6521                         Ops, "vtbx3");
6522   }
6523   case NEON::BI__builtin_neon_vqtbx4q_v: {
6524     return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
6525                         Ops, "vtbx4");
6526   }
6527   case NEON::BI__builtin_neon_vsqadd_v:
6528   case NEON::BI__builtin_neon_vsqaddq_v: {
6529     Int = Intrinsic::aarch64_neon_usqadd;
6530     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
6531   }
6532   case NEON::BI__builtin_neon_vuqadd_v:
6533   case NEON::BI__builtin_neon_vuqaddq_v: {
6534     Int = Intrinsic::aarch64_neon_suqadd;
6535     return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
6536   }
6537   }
6538 }
6539 
6540 llvm::Value *CodeGenFunction::
BuildVector(ArrayRef<llvm::Value * > Ops)6541 BuildVector(ArrayRef<llvm::Value*> Ops) {
6542   assert((Ops.size() & (Ops.size() - 1)) == 0 &&
6543          "Not a power-of-two sized vector!");
6544   bool AllConstants = true;
6545   for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
6546     AllConstants &= isa<Constant>(Ops[i]);
6547 
6548   // If this is a constant vector, create a ConstantVector.
6549   if (AllConstants) {
6550     SmallVector<llvm::Constant*, 16> CstOps;
6551     for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6552       CstOps.push_back(cast<Constant>(Ops[i]));
6553     return llvm::ConstantVector::get(CstOps);
6554   }
6555 
6556   // Otherwise, insertelement the values to build the vector.
6557   Value *Result =
6558     llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
6559 
6560   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
6561     Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
6562 
6563   return Result;
6564 }
6565 
6566 // Convert the mask from an integer type to a vector of i1.
getMaskVecValue(CodeGenFunction & CGF,Value * Mask,unsigned NumElts)6567 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
6568                               unsigned NumElts) {
6569 
6570   llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
6571                          cast<IntegerType>(Mask->getType())->getBitWidth());
6572   Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
6573 
6574   // If we have less than 8 elements, then the starting mask was an i8 and
6575   // we need to extract down to the right number of elements.
6576   if (NumElts < 8) {
6577     uint32_t Indices[4];
6578     for (unsigned i = 0; i != NumElts; ++i)
6579       Indices[i] = i;
6580     MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
6581                                              makeArrayRef(Indices, NumElts),
6582                                              "extract");
6583   }
6584   return MaskVec;
6585 }
6586 
EmitX86MaskedStore(CodeGenFunction & CGF,SmallVectorImpl<Value * > & Ops,unsigned Align)6587 static Value *EmitX86MaskedStore(CodeGenFunction &CGF,
6588                                  SmallVectorImpl<Value *> &Ops,
6589                                  unsigned Align) {
6590   // Cast the pointer to right type.
6591   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6592                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6593 
6594   // If the mask is all ones just emit a regular store.
6595   if (const auto *C = dyn_cast<Constant>(Ops[2]))
6596     if (C->isAllOnesValue())
6597       return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
6598 
6599   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6600                                    Ops[1]->getType()->getVectorNumElements());
6601 
6602   return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
6603 }
6604 
EmitX86MaskedLoad(CodeGenFunction & CGF,SmallVectorImpl<Value * > & Ops,unsigned Align)6605 static Value *EmitX86MaskedLoad(CodeGenFunction &CGF,
6606                                 SmallVectorImpl<Value *> &Ops, unsigned Align) {
6607   // Cast the pointer to right type.
6608   Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
6609                                llvm::PointerType::getUnqual(Ops[1]->getType()));
6610 
6611   // If the mask is all ones just emit a regular store.
6612   if (const auto *C = dyn_cast<Constant>(Ops[2]))
6613     if (C->isAllOnesValue())
6614       return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
6615 
6616   Value *MaskVec = getMaskVecValue(CGF, Ops[2],
6617                                    Ops[1]->getType()->getVectorNumElements());
6618 
6619   return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
6620 }
6621 
EmitX86Select(CodeGenFunction & CGF,Value * Mask,Value * Op0,Value * Op1)6622 static Value *EmitX86Select(CodeGenFunction &CGF,
6623                             Value *Mask, Value *Op0, Value *Op1) {
6624 
6625   // If the mask is all ones just return first argument.
6626   if (const auto *C = dyn_cast<Constant>(Mask))
6627     if (C->isAllOnesValue())
6628       return Op0;
6629 
6630   Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
6631 
6632   return CGF.Builder.CreateSelect(Mask, Op0, Op1);
6633 }
6634 
EmitX86MaskedCompare(CodeGenFunction & CGF,unsigned CC,bool Signed,SmallVectorImpl<Value * > & Ops)6635 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
6636                                    bool Signed, SmallVectorImpl<Value *> &Ops) {
6637   unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
6638   Value *Cmp;
6639 
6640   if (CC == 3) {
6641     Cmp = Constant::getNullValue(
6642                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6643   } else if (CC == 7) {
6644     Cmp = Constant::getAllOnesValue(
6645                        llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
6646   } else {
6647     ICmpInst::Predicate Pred;
6648     switch (CC) {
6649     default: llvm_unreachable("Unknown condition code");
6650     case 0: Pred = ICmpInst::ICMP_EQ;  break;
6651     case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
6652     case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
6653     case 4: Pred = ICmpInst::ICMP_NE;  break;
6654     case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
6655     case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
6656     }
6657     Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
6658   }
6659 
6660   const auto *C = dyn_cast<Constant>(Ops.back());
6661   if (!C || !C->isAllOnesValue())
6662     Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
6663 
6664   if (NumElts < 8) {
6665     uint32_t Indices[8];
6666     for (unsigned i = 0; i != NumElts; ++i)
6667       Indices[i] = i;
6668     for (unsigned i = NumElts; i != 8; ++i)
6669       Indices[i] = i % NumElts + NumElts;
6670     Cmp = CGF.Builder.CreateShuffleVector(
6671         Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
6672   }
6673   return CGF.Builder.CreateBitCast(Cmp,
6674                                    IntegerType::get(CGF.getLLVMContext(),
6675                                                     std::max(NumElts, 8U)));
6676 }
6677 
EmitX86BuiltinExpr(unsigned BuiltinID,const CallExpr * E)6678 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
6679                                            const CallExpr *E) {
6680   if (BuiltinID == X86::BI__builtin_ms_va_start ||
6681       BuiltinID == X86::BI__builtin_ms_va_end)
6682     return EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
6683                           BuiltinID == X86::BI__builtin_ms_va_start);
6684   if (BuiltinID == X86::BI__builtin_ms_va_copy) {
6685     // Lower this manually. We can't reliably determine whether or not any
6686     // given va_copy() is for a Win64 va_list from the calling convention
6687     // alone, because it's legal to do this from a System V ABI function.
6688     // With opaque pointer types, we won't have enough information in LLVM
6689     // IR to determine this from the argument types, either. Best to do it
6690     // now, while we have enough information.
6691     Address DestAddr = EmitMSVAListRef(E->getArg(0));
6692     Address SrcAddr = EmitMSVAListRef(E->getArg(1));
6693 
6694     llvm::Type *BPP = Int8PtrPtrTy;
6695 
6696     DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
6697                        DestAddr.getAlignment());
6698     SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
6699                       SrcAddr.getAlignment());
6700 
6701     Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
6702     return Builder.CreateStore(ArgPtr, DestAddr);
6703   }
6704 
6705   SmallVector<Value*, 4> Ops;
6706 
6707   // Find out if any arguments are required to be integer constant expressions.
6708   unsigned ICEArguments = 0;
6709   ASTContext::GetBuiltinTypeError Error;
6710   getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6711   assert(Error == ASTContext::GE_None && "Should not codegen an error");
6712 
6713   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
6714     // If this is a normal argument, just emit it as a scalar.
6715     if ((ICEArguments & (1 << i)) == 0) {
6716       Ops.push_back(EmitScalarExpr(E->getArg(i)));
6717       continue;
6718     }
6719 
6720     // If this is required to be a constant, constant fold it so that we know
6721     // that the generated intrinsic gets a ConstantInt.
6722     llvm::APSInt Result;
6723     bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6724     assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6725     Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6726   }
6727 
6728   // These exist so that the builtin that takes an immediate can be bounds
6729   // checked by clang to avoid passing bad immediates to the backend. Since
6730   // AVX has a larger immediate than SSE we would need separate builtins to
6731   // do the different bounds checking. Rather than create a clang specific
6732   // SSE only builtin, this implements eight separate builtins to match gcc
6733   // implementation.
6734   auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
6735     Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
6736     llvm::Function *F = CGM.getIntrinsic(ID);
6737     return Builder.CreateCall(F, Ops);
6738   };
6739 
6740   // For the vector forms of FP comparisons, translate the builtins directly to
6741   // IR.
6742   // TODO: The builtins could be removed if the SSE header files used vector
6743   // extension comparisons directly (vector ordered/unordered may need
6744   // additional support via __builtin_isnan()).
6745   auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
6746     Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
6747     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
6748     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
6749     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
6750     return Builder.CreateBitCast(Sext, FPVecTy);
6751   };
6752 
6753   switch (BuiltinID) {
6754   default: return nullptr;
6755   case X86::BI__builtin_cpu_supports: {
6756     const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
6757     StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
6758 
6759     // TODO: When/if this becomes more than x86 specific then use a TargetInfo
6760     // based mapping.
6761     // Processor features and mapping to processor feature value.
6762     enum X86Features {
6763       CMOV = 0,
6764       MMX,
6765       POPCNT,
6766       SSE,
6767       SSE2,
6768       SSE3,
6769       SSSE3,
6770       SSE4_1,
6771       SSE4_2,
6772       AVX,
6773       AVX2,
6774       SSE4_A,
6775       FMA4,
6776       XOP,
6777       FMA,
6778       AVX512F,
6779       BMI,
6780       BMI2,
6781       AES,
6782       PCLMUL,
6783       AVX512VL,
6784       AVX512BW,
6785       AVX512DQ,
6786       AVX512CD,
6787       AVX512ER,
6788       AVX512PF,
6789       AVX512VBMI,
6790       AVX512IFMA,
6791       MAX
6792     };
6793 
6794     X86Features Feature = StringSwitch<X86Features>(FeatureStr)
6795                               .Case("cmov", X86Features::CMOV)
6796                               .Case("mmx", X86Features::MMX)
6797                               .Case("popcnt", X86Features::POPCNT)
6798                               .Case("sse", X86Features::SSE)
6799                               .Case("sse2", X86Features::SSE2)
6800                               .Case("sse3", X86Features::SSE3)
6801                               .Case("ssse3", X86Features::SSSE3)
6802                               .Case("sse4.1", X86Features::SSE4_1)
6803                               .Case("sse4.2", X86Features::SSE4_2)
6804                               .Case("avx", X86Features::AVX)
6805                               .Case("avx2", X86Features::AVX2)
6806                               .Case("sse4a", X86Features::SSE4_A)
6807                               .Case("fma4", X86Features::FMA4)
6808                               .Case("xop", X86Features::XOP)
6809                               .Case("fma", X86Features::FMA)
6810                               .Case("avx512f", X86Features::AVX512F)
6811                               .Case("bmi", X86Features::BMI)
6812                               .Case("bmi2", X86Features::BMI2)
6813                               .Case("aes", X86Features::AES)
6814                               .Case("pclmul", X86Features::PCLMUL)
6815                               .Case("avx512vl", X86Features::AVX512VL)
6816                               .Case("avx512bw", X86Features::AVX512BW)
6817                               .Case("avx512dq", X86Features::AVX512DQ)
6818                               .Case("avx512cd", X86Features::AVX512CD)
6819                               .Case("avx512er", X86Features::AVX512ER)
6820                               .Case("avx512pf", X86Features::AVX512PF)
6821                               .Case("avx512vbmi", X86Features::AVX512VBMI)
6822                               .Case("avx512ifma", X86Features::AVX512IFMA)
6823                               .Default(X86Features::MAX);
6824     assert(Feature != X86Features::MAX && "Invalid feature!");
6825 
6826     // Matching the struct layout from the compiler-rt/libgcc structure that is
6827     // filled in:
6828     // unsigned int __cpu_vendor;
6829     // unsigned int __cpu_type;
6830     // unsigned int __cpu_subtype;
6831     // unsigned int __cpu_features[1];
6832     llvm::Type *STy = llvm::StructType::get(
6833         Int32Ty, Int32Ty, Int32Ty, llvm::ArrayType::get(Int32Ty, 1), nullptr);
6834 
6835     // Grab the global __cpu_model.
6836     llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
6837 
6838     // Grab the first (0th) element from the field __cpu_features off of the
6839     // global in the struct STy.
6840     Value *Idxs[] = {
6841       ConstantInt::get(Int32Ty, 0),
6842       ConstantInt::get(Int32Ty, 3),
6843       ConstantInt::get(Int32Ty, 0)
6844     };
6845     Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
6846     Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
6847                                                 CharUnits::fromQuantity(4));
6848 
6849     // Check the value of the bit corresponding to the feature requested.
6850     Value *Bitset = Builder.CreateAnd(
6851         Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
6852     return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
6853   }
6854   case X86::BI_mm_prefetch: {
6855     Value *Address = Ops[0];
6856     Value *RW = ConstantInt::get(Int32Ty, 0);
6857     Value *Locality = Ops[1];
6858     Value *Data = ConstantInt::get(Int32Ty, 1);
6859     Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
6860     return Builder.CreateCall(F, {Address, RW, Locality, Data});
6861   }
6862   case X86::BI__builtin_ia32_undef128:
6863   case X86::BI__builtin_ia32_undef256:
6864   case X86::BI__builtin_ia32_undef512:
6865     return UndefValue::get(ConvertType(E->getType()));
6866   case X86::BI__builtin_ia32_vec_init_v8qi:
6867   case X86::BI__builtin_ia32_vec_init_v4hi:
6868   case X86::BI__builtin_ia32_vec_init_v2si:
6869     return Builder.CreateBitCast(BuildVector(Ops),
6870                                  llvm::Type::getX86_MMXTy(getLLVMContext()));
6871   case X86::BI__builtin_ia32_vec_ext_v2si:
6872     return Builder.CreateExtractElement(Ops[0],
6873                                   llvm::ConstantInt::get(Ops[1]->getType(), 0));
6874   case X86::BI__builtin_ia32_ldmxcsr: {
6875     Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6876     Builder.CreateStore(Ops[0], Tmp);
6877     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
6878                           Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6879   }
6880   case X86::BI__builtin_ia32_stmxcsr: {
6881     Address Tmp = CreateMemTemp(E->getType());
6882     Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
6883                        Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
6884     return Builder.CreateLoad(Tmp, "stmxcsr");
6885   }
6886   case X86::BI__builtin_ia32_xsave:
6887   case X86::BI__builtin_ia32_xsave64:
6888   case X86::BI__builtin_ia32_xrstor:
6889   case X86::BI__builtin_ia32_xrstor64:
6890   case X86::BI__builtin_ia32_xsaveopt:
6891   case X86::BI__builtin_ia32_xsaveopt64:
6892   case X86::BI__builtin_ia32_xrstors:
6893   case X86::BI__builtin_ia32_xrstors64:
6894   case X86::BI__builtin_ia32_xsavec:
6895   case X86::BI__builtin_ia32_xsavec64:
6896   case X86::BI__builtin_ia32_xsaves:
6897   case X86::BI__builtin_ia32_xsaves64: {
6898     Intrinsic::ID ID;
6899 #define INTRINSIC_X86_XSAVE_ID(NAME) \
6900     case X86::BI__builtin_ia32_##NAME: \
6901       ID = Intrinsic::x86_##NAME; \
6902       break
6903     switch (BuiltinID) {
6904     default: llvm_unreachable("Unsupported intrinsic!");
6905     INTRINSIC_X86_XSAVE_ID(xsave);
6906     INTRINSIC_X86_XSAVE_ID(xsave64);
6907     INTRINSIC_X86_XSAVE_ID(xrstor);
6908     INTRINSIC_X86_XSAVE_ID(xrstor64);
6909     INTRINSIC_X86_XSAVE_ID(xsaveopt);
6910     INTRINSIC_X86_XSAVE_ID(xsaveopt64);
6911     INTRINSIC_X86_XSAVE_ID(xrstors);
6912     INTRINSIC_X86_XSAVE_ID(xrstors64);
6913     INTRINSIC_X86_XSAVE_ID(xsavec);
6914     INTRINSIC_X86_XSAVE_ID(xsavec64);
6915     INTRINSIC_X86_XSAVE_ID(xsaves);
6916     INTRINSIC_X86_XSAVE_ID(xsaves64);
6917     }
6918 #undef INTRINSIC_X86_XSAVE_ID
6919     Value *Mhi = Builder.CreateTrunc(
6920       Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
6921     Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
6922     Ops[1] = Mhi;
6923     Ops.push_back(Mlo);
6924     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
6925   }
6926   case X86::BI__builtin_ia32_storedqudi128_mask:
6927   case X86::BI__builtin_ia32_storedqusi128_mask:
6928   case X86::BI__builtin_ia32_storedquhi128_mask:
6929   case X86::BI__builtin_ia32_storedquqi128_mask:
6930   case X86::BI__builtin_ia32_storeupd128_mask:
6931   case X86::BI__builtin_ia32_storeups128_mask:
6932   case X86::BI__builtin_ia32_storedqudi256_mask:
6933   case X86::BI__builtin_ia32_storedqusi256_mask:
6934   case X86::BI__builtin_ia32_storedquhi256_mask:
6935   case X86::BI__builtin_ia32_storedquqi256_mask:
6936   case X86::BI__builtin_ia32_storeupd256_mask:
6937   case X86::BI__builtin_ia32_storeups256_mask:
6938   case X86::BI__builtin_ia32_storedqudi512_mask:
6939   case X86::BI__builtin_ia32_storedqusi512_mask:
6940   case X86::BI__builtin_ia32_storedquhi512_mask:
6941   case X86::BI__builtin_ia32_storedquqi512_mask:
6942   case X86::BI__builtin_ia32_storeupd512_mask:
6943   case X86::BI__builtin_ia32_storeups512_mask:
6944     return EmitX86MaskedStore(*this, Ops, 1);
6945 
6946   case X86::BI__builtin_ia32_movdqa32store128_mask:
6947   case X86::BI__builtin_ia32_movdqa64store128_mask:
6948   case X86::BI__builtin_ia32_storeaps128_mask:
6949   case X86::BI__builtin_ia32_storeapd128_mask:
6950   case X86::BI__builtin_ia32_movdqa32store256_mask:
6951   case X86::BI__builtin_ia32_movdqa64store256_mask:
6952   case X86::BI__builtin_ia32_storeaps256_mask:
6953   case X86::BI__builtin_ia32_storeapd256_mask:
6954   case X86::BI__builtin_ia32_movdqa32store512_mask:
6955   case X86::BI__builtin_ia32_movdqa64store512_mask:
6956   case X86::BI__builtin_ia32_storeaps512_mask:
6957   case X86::BI__builtin_ia32_storeapd512_mask: {
6958     unsigned Align =
6959       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6960     return EmitX86MaskedStore(*this, Ops, Align);
6961   }
6962   case X86::BI__builtin_ia32_loadups128_mask:
6963   case X86::BI__builtin_ia32_loadups256_mask:
6964   case X86::BI__builtin_ia32_loadups512_mask:
6965   case X86::BI__builtin_ia32_loadupd128_mask:
6966   case X86::BI__builtin_ia32_loadupd256_mask:
6967   case X86::BI__builtin_ia32_loadupd512_mask:
6968   case X86::BI__builtin_ia32_loaddquqi128_mask:
6969   case X86::BI__builtin_ia32_loaddquqi256_mask:
6970   case X86::BI__builtin_ia32_loaddquqi512_mask:
6971   case X86::BI__builtin_ia32_loaddquhi128_mask:
6972   case X86::BI__builtin_ia32_loaddquhi256_mask:
6973   case X86::BI__builtin_ia32_loaddquhi512_mask:
6974   case X86::BI__builtin_ia32_loaddqusi128_mask:
6975   case X86::BI__builtin_ia32_loaddqusi256_mask:
6976   case X86::BI__builtin_ia32_loaddqusi512_mask:
6977   case X86::BI__builtin_ia32_loaddqudi128_mask:
6978   case X86::BI__builtin_ia32_loaddqudi256_mask:
6979   case X86::BI__builtin_ia32_loaddqudi512_mask:
6980     return EmitX86MaskedLoad(*this, Ops, 1);
6981 
6982   case X86::BI__builtin_ia32_loadaps128_mask:
6983   case X86::BI__builtin_ia32_loadaps256_mask:
6984   case X86::BI__builtin_ia32_loadaps512_mask:
6985   case X86::BI__builtin_ia32_loadapd128_mask:
6986   case X86::BI__builtin_ia32_loadapd256_mask:
6987   case X86::BI__builtin_ia32_loadapd512_mask:
6988   case X86::BI__builtin_ia32_movdqa32load128_mask:
6989   case X86::BI__builtin_ia32_movdqa32load256_mask:
6990   case X86::BI__builtin_ia32_movdqa32load512_mask:
6991   case X86::BI__builtin_ia32_movdqa64load128_mask:
6992   case X86::BI__builtin_ia32_movdqa64load256_mask:
6993   case X86::BI__builtin_ia32_movdqa64load512_mask: {
6994     unsigned Align =
6995       getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
6996     return EmitX86MaskedLoad(*this, Ops, Align);
6997   }
6998   case X86::BI__builtin_ia32_storehps:
6999   case X86::BI__builtin_ia32_storelps: {
7000     llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7001     llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7002 
7003     // cast val v2i64
7004     Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7005 
7006     // extract (0, 1)
7007     unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7008     llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7009     Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7010 
7011     // cast pointer to i64 & store
7012     Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7013     return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7014   }
7015   case X86::BI__builtin_ia32_palignr128:
7016   case X86::BI__builtin_ia32_palignr256:
7017   case X86::BI__builtin_ia32_palignr128_mask:
7018   case X86::BI__builtin_ia32_palignr256_mask:
7019   case X86::BI__builtin_ia32_palignr512_mask: {
7020     unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7021 
7022     unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7023     assert(NumElts % 16 == 0);
7024 
7025     // If palignr is shifting the pair of vectors more than the size of two
7026     // lanes, emit zero.
7027     if (ShiftVal >= 32)
7028       return llvm::Constant::getNullValue(ConvertType(E->getType()));
7029 
7030     // If palignr is shifting the pair of input vectors more than one lane,
7031     // but less than two lanes, convert to shifting in zeroes.
7032     if (ShiftVal > 16) {
7033       ShiftVal -= 16;
7034       Ops[1] = Ops[0];
7035       Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7036     }
7037 
7038     uint32_t Indices[64];
7039     // 256-bit palignr operates on 128-bit lanes so we need to handle that
7040     for (unsigned l = 0; l != NumElts; l += 16) {
7041       for (unsigned i = 0; i != 16; ++i) {
7042         unsigned Idx = ShiftVal + i;
7043         if (Idx >= 16)
7044           Idx += NumElts - 16; // End of lane, switch operand.
7045         Indices[l + i] = Idx + l;
7046       }
7047     }
7048 
7049     Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7050                                                makeArrayRef(Indices, NumElts),
7051                                                "palignr");
7052 
7053     // If this isn't a masked builtin, just return the align operation.
7054     if (Ops.size() == 3)
7055       return Align;
7056 
7057     return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7058   }
7059 
7060   case X86::BI__builtin_ia32_movnti:
7061   case X86::BI__builtin_ia32_movnti64: {
7062     llvm::MDNode *Node = llvm::MDNode::get(
7063         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7064 
7065     // Convert the type of the pointer to a pointer to the stored type.
7066     Value *BC = Builder.CreateBitCast(Ops[0],
7067                                 llvm::PointerType::getUnqual(Ops[1]->getType()),
7068                                       "cast");
7069     StoreInst *SI = Builder.CreateDefaultAlignedStore(Ops[1], BC);
7070     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7071 
7072     // No alignment for scalar intrinsic store.
7073     SI->setAlignment(1);
7074     return SI;
7075   }
7076   case X86::BI__builtin_ia32_movntsd:
7077   case X86::BI__builtin_ia32_movntss: {
7078     llvm::MDNode *Node = llvm::MDNode::get(
7079         getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7080 
7081     // Extract the 0'th element of the source vector.
7082     Value *Scl = Builder.CreateExtractElement(Ops[1], (uint64_t)0, "extract");
7083 
7084     // Convert the type of the pointer to a pointer to the stored type.
7085     Value *BC = Builder.CreateBitCast(Ops[0],
7086                                 llvm::PointerType::getUnqual(Scl->getType()),
7087                                       "cast");
7088 
7089     // Unaligned nontemporal store of the scalar value.
7090     StoreInst *SI = Builder.CreateDefaultAlignedStore(Scl, BC);
7091     SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7092     SI->setAlignment(1);
7093     return SI;
7094   }
7095 
7096   case X86::BI__builtin_ia32_selectb_128:
7097   case X86::BI__builtin_ia32_selectb_256:
7098   case X86::BI__builtin_ia32_selectb_512:
7099   case X86::BI__builtin_ia32_selectw_128:
7100   case X86::BI__builtin_ia32_selectw_256:
7101   case X86::BI__builtin_ia32_selectw_512:
7102   case X86::BI__builtin_ia32_selectd_128:
7103   case X86::BI__builtin_ia32_selectd_256:
7104   case X86::BI__builtin_ia32_selectd_512:
7105   case X86::BI__builtin_ia32_selectq_128:
7106   case X86::BI__builtin_ia32_selectq_256:
7107   case X86::BI__builtin_ia32_selectq_512:
7108   case X86::BI__builtin_ia32_selectps_128:
7109   case X86::BI__builtin_ia32_selectps_256:
7110   case X86::BI__builtin_ia32_selectps_512:
7111   case X86::BI__builtin_ia32_selectpd_128:
7112   case X86::BI__builtin_ia32_selectpd_256:
7113   case X86::BI__builtin_ia32_selectpd_512:
7114     return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7115   case X86::BI__builtin_ia32_pcmpeqb128_mask:
7116   case X86::BI__builtin_ia32_pcmpeqb256_mask:
7117   case X86::BI__builtin_ia32_pcmpeqb512_mask:
7118   case X86::BI__builtin_ia32_pcmpeqw128_mask:
7119   case X86::BI__builtin_ia32_pcmpeqw256_mask:
7120   case X86::BI__builtin_ia32_pcmpeqw512_mask:
7121   case X86::BI__builtin_ia32_pcmpeqd128_mask:
7122   case X86::BI__builtin_ia32_pcmpeqd256_mask:
7123   case X86::BI__builtin_ia32_pcmpeqd512_mask:
7124   case X86::BI__builtin_ia32_pcmpeqq128_mask:
7125   case X86::BI__builtin_ia32_pcmpeqq256_mask:
7126   case X86::BI__builtin_ia32_pcmpeqq512_mask:
7127     return EmitX86MaskedCompare(*this, 0, false, Ops);
7128   case X86::BI__builtin_ia32_pcmpgtb128_mask:
7129   case X86::BI__builtin_ia32_pcmpgtb256_mask:
7130   case X86::BI__builtin_ia32_pcmpgtb512_mask:
7131   case X86::BI__builtin_ia32_pcmpgtw128_mask:
7132   case X86::BI__builtin_ia32_pcmpgtw256_mask:
7133   case X86::BI__builtin_ia32_pcmpgtw512_mask:
7134   case X86::BI__builtin_ia32_pcmpgtd128_mask:
7135   case X86::BI__builtin_ia32_pcmpgtd256_mask:
7136   case X86::BI__builtin_ia32_pcmpgtd512_mask:
7137   case X86::BI__builtin_ia32_pcmpgtq128_mask:
7138   case X86::BI__builtin_ia32_pcmpgtq256_mask:
7139   case X86::BI__builtin_ia32_pcmpgtq512_mask:
7140     return EmitX86MaskedCompare(*this, 6, true, Ops);
7141   case X86::BI__builtin_ia32_cmpb128_mask:
7142   case X86::BI__builtin_ia32_cmpb256_mask:
7143   case X86::BI__builtin_ia32_cmpb512_mask:
7144   case X86::BI__builtin_ia32_cmpw128_mask:
7145   case X86::BI__builtin_ia32_cmpw256_mask:
7146   case X86::BI__builtin_ia32_cmpw512_mask:
7147   case X86::BI__builtin_ia32_cmpd128_mask:
7148   case X86::BI__builtin_ia32_cmpd256_mask:
7149   case X86::BI__builtin_ia32_cmpd512_mask:
7150   case X86::BI__builtin_ia32_cmpq128_mask:
7151   case X86::BI__builtin_ia32_cmpq256_mask:
7152   case X86::BI__builtin_ia32_cmpq512_mask: {
7153     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7154     return EmitX86MaskedCompare(*this, CC, true, Ops);
7155   }
7156   case X86::BI__builtin_ia32_ucmpb128_mask:
7157   case X86::BI__builtin_ia32_ucmpb256_mask:
7158   case X86::BI__builtin_ia32_ucmpb512_mask:
7159   case X86::BI__builtin_ia32_ucmpw128_mask:
7160   case X86::BI__builtin_ia32_ucmpw256_mask:
7161   case X86::BI__builtin_ia32_ucmpw512_mask:
7162   case X86::BI__builtin_ia32_ucmpd128_mask:
7163   case X86::BI__builtin_ia32_ucmpd256_mask:
7164   case X86::BI__builtin_ia32_ucmpd512_mask:
7165   case X86::BI__builtin_ia32_ucmpq128_mask:
7166   case X86::BI__builtin_ia32_ucmpq256_mask:
7167   case X86::BI__builtin_ia32_ucmpq512_mask: {
7168     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7169     return EmitX86MaskedCompare(*this, CC, false, Ops);
7170   }
7171 
7172   case X86::BI__builtin_ia32_vplzcntd_128_mask:
7173   case X86::BI__builtin_ia32_vplzcntd_256_mask:
7174   case X86::BI__builtin_ia32_vplzcntd_512_mask:
7175   case X86::BI__builtin_ia32_vplzcntq_128_mask:
7176   case X86::BI__builtin_ia32_vplzcntq_256_mask:
7177   case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7178     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7179     return EmitX86Select(*this, Ops[2],
7180                          Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7181                          Ops[1]);
7182   }
7183 
7184   // TODO: Handle 64/512-bit vector widths of min/max.
7185   case X86::BI__builtin_ia32_pmaxsb128:
7186   case X86::BI__builtin_ia32_pmaxsw128:
7187   case X86::BI__builtin_ia32_pmaxsd128:
7188   case X86::BI__builtin_ia32_pmaxsb256:
7189   case X86::BI__builtin_ia32_pmaxsw256:
7190   case X86::BI__builtin_ia32_pmaxsd256: {
7191     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Ops[1]);
7192     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7193   }
7194   case X86::BI__builtin_ia32_pmaxub128:
7195   case X86::BI__builtin_ia32_pmaxuw128:
7196   case X86::BI__builtin_ia32_pmaxud128:
7197   case X86::BI__builtin_ia32_pmaxub256:
7198   case X86::BI__builtin_ia32_pmaxuw256:
7199   case X86::BI__builtin_ia32_pmaxud256: {
7200     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Ops[0], Ops[1]);
7201     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7202   }
7203   case X86::BI__builtin_ia32_pminsb128:
7204   case X86::BI__builtin_ia32_pminsw128:
7205   case X86::BI__builtin_ia32_pminsd128:
7206   case X86::BI__builtin_ia32_pminsb256:
7207   case X86::BI__builtin_ia32_pminsw256:
7208   case X86::BI__builtin_ia32_pminsd256: {
7209     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_SLT, Ops[0], Ops[1]);
7210     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7211   }
7212   case X86::BI__builtin_ia32_pminub128:
7213   case X86::BI__builtin_ia32_pminuw128:
7214   case X86::BI__builtin_ia32_pminud128:
7215   case X86::BI__builtin_ia32_pminub256:
7216   case X86::BI__builtin_ia32_pminuw256:
7217   case X86::BI__builtin_ia32_pminud256: {
7218     Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, Ops[0], Ops[1]);
7219     return Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7220   }
7221 
7222   // 3DNow!
7223   case X86::BI__builtin_ia32_pswapdsf:
7224   case X86::BI__builtin_ia32_pswapdsi: {
7225     llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7226     Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7227     llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7228     return Builder.CreateCall(F, Ops, "pswapd");
7229   }
7230   case X86::BI__builtin_ia32_rdrand16_step:
7231   case X86::BI__builtin_ia32_rdrand32_step:
7232   case X86::BI__builtin_ia32_rdrand64_step:
7233   case X86::BI__builtin_ia32_rdseed16_step:
7234   case X86::BI__builtin_ia32_rdseed32_step:
7235   case X86::BI__builtin_ia32_rdseed64_step: {
7236     Intrinsic::ID ID;
7237     switch (BuiltinID) {
7238     default: llvm_unreachable("Unsupported intrinsic!");
7239     case X86::BI__builtin_ia32_rdrand16_step:
7240       ID = Intrinsic::x86_rdrand_16;
7241       break;
7242     case X86::BI__builtin_ia32_rdrand32_step:
7243       ID = Intrinsic::x86_rdrand_32;
7244       break;
7245     case X86::BI__builtin_ia32_rdrand64_step:
7246       ID = Intrinsic::x86_rdrand_64;
7247       break;
7248     case X86::BI__builtin_ia32_rdseed16_step:
7249       ID = Intrinsic::x86_rdseed_16;
7250       break;
7251     case X86::BI__builtin_ia32_rdseed32_step:
7252       ID = Intrinsic::x86_rdseed_32;
7253       break;
7254     case X86::BI__builtin_ia32_rdseed64_step:
7255       ID = Intrinsic::x86_rdseed_64;
7256       break;
7257     }
7258 
7259     Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7260     Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7261                                       Ops[0]);
7262     return Builder.CreateExtractValue(Call, 1);
7263   }
7264 
7265   // SSE packed comparison intrinsics
7266   case X86::BI__builtin_ia32_cmpeqps:
7267   case X86::BI__builtin_ia32_cmpeqpd:
7268     return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7269   case X86::BI__builtin_ia32_cmpltps:
7270   case X86::BI__builtin_ia32_cmpltpd:
7271     return getVectorFCmpIR(CmpInst::FCMP_OLT);
7272   case X86::BI__builtin_ia32_cmpleps:
7273   case X86::BI__builtin_ia32_cmplepd:
7274     return getVectorFCmpIR(CmpInst::FCMP_OLE);
7275   case X86::BI__builtin_ia32_cmpunordps:
7276   case X86::BI__builtin_ia32_cmpunordpd:
7277     return getVectorFCmpIR(CmpInst::FCMP_UNO);
7278   case X86::BI__builtin_ia32_cmpneqps:
7279   case X86::BI__builtin_ia32_cmpneqpd:
7280     return getVectorFCmpIR(CmpInst::FCMP_UNE);
7281   case X86::BI__builtin_ia32_cmpnltps:
7282   case X86::BI__builtin_ia32_cmpnltpd:
7283     return getVectorFCmpIR(CmpInst::FCMP_UGE);
7284   case X86::BI__builtin_ia32_cmpnleps:
7285   case X86::BI__builtin_ia32_cmpnlepd:
7286     return getVectorFCmpIR(CmpInst::FCMP_UGT);
7287   case X86::BI__builtin_ia32_cmpordps:
7288   case X86::BI__builtin_ia32_cmpordpd:
7289     return getVectorFCmpIR(CmpInst::FCMP_ORD);
7290   case X86::BI__builtin_ia32_cmpps:
7291   case X86::BI__builtin_ia32_cmpps256:
7292   case X86::BI__builtin_ia32_cmppd:
7293   case X86::BI__builtin_ia32_cmppd256: {
7294     unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7295     // If this one of the SSE immediates, we can use native IR.
7296     if (CC < 8) {
7297       FCmpInst::Predicate Pred;
7298       switch (CC) {
7299       case 0: Pred = FCmpInst::FCMP_OEQ; break;
7300       case 1: Pred = FCmpInst::FCMP_OLT; break;
7301       case 2: Pred = FCmpInst::FCMP_OLE; break;
7302       case 3: Pred = FCmpInst::FCMP_UNO; break;
7303       case 4: Pred = FCmpInst::FCMP_UNE; break;
7304       case 5: Pred = FCmpInst::FCMP_UGE; break;
7305       case 6: Pred = FCmpInst::FCMP_UGT; break;
7306       case 7: Pred = FCmpInst::FCMP_ORD; break;
7307       }
7308       return getVectorFCmpIR(Pred);
7309     }
7310 
7311     // We can't handle 8-31 immediates with native IR, use the intrinsic.
7312     Intrinsic::ID ID;
7313     switch (BuiltinID) {
7314     default: llvm_unreachable("Unsupported intrinsic!");
7315     case X86::BI__builtin_ia32_cmpps:
7316       ID = Intrinsic::x86_sse_cmp_ps;
7317       break;
7318     case X86::BI__builtin_ia32_cmpps256:
7319       ID = Intrinsic::x86_avx_cmp_ps_256;
7320       break;
7321     case X86::BI__builtin_ia32_cmppd:
7322       ID = Intrinsic::x86_sse2_cmp_pd;
7323       break;
7324     case X86::BI__builtin_ia32_cmppd256:
7325       ID = Intrinsic::x86_avx_cmp_pd_256;
7326       break;
7327     }
7328 
7329     return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7330   }
7331 
7332   // SSE scalar comparison intrinsics
7333   case X86::BI__builtin_ia32_cmpeqss:
7334     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7335   case X86::BI__builtin_ia32_cmpltss:
7336     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7337   case X86::BI__builtin_ia32_cmpless:
7338     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7339   case X86::BI__builtin_ia32_cmpunordss:
7340     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7341   case X86::BI__builtin_ia32_cmpneqss:
7342     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7343   case X86::BI__builtin_ia32_cmpnltss:
7344     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7345   case X86::BI__builtin_ia32_cmpnless:
7346     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7347   case X86::BI__builtin_ia32_cmpordss:
7348     return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7349   case X86::BI__builtin_ia32_cmpeqsd:
7350     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7351   case X86::BI__builtin_ia32_cmpltsd:
7352     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7353   case X86::BI__builtin_ia32_cmplesd:
7354     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7355   case X86::BI__builtin_ia32_cmpunordsd:
7356     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7357   case X86::BI__builtin_ia32_cmpneqsd:
7358     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7359   case X86::BI__builtin_ia32_cmpnltsd:
7360     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7361   case X86::BI__builtin_ia32_cmpnlesd:
7362     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
7363   case X86::BI__builtin_ia32_cmpordsd:
7364     return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
7365   }
7366 }
7367 
7368 
EmitPPCBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7369 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
7370                                            const CallExpr *E) {
7371   SmallVector<Value*, 4> Ops;
7372 
7373   for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
7374     Ops.push_back(EmitScalarExpr(E->getArg(i)));
7375 
7376   Intrinsic::ID ID = Intrinsic::not_intrinsic;
7377 
7378   switch (BuiltinID) {
7379   default: return nullptr;
7380 
7381   // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
7382   // call __builtin_readcyclecounter.
7383   case PPC::BI__builtin_ppc_get_timebase:
7384     return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
7385 
7386   // vec_ld, vec_lvsl, vec_lvsr
7387   case PPC::BI__builtin_altivec_lvx:
7388   case PPC::BI__builtin_altivec_lvxl:
7389   case PPC::BI__builtin_altivec_lvebx:
7390   case PPC::BI__builtin_altivec_lvehx:
7391   case PPC::BI__builtin_altivec_lvewx:
7392   case PPC::BI__builtin_altivec_lvsl:
7393   case PPC::BI__builtin_altivec_lvsr:
7394   case PPC::BI__builtin_vsx_lxvd2x:
7395   case PPC::BI__builtin_vsx_lxvw4x:
7396   {
7397     Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
7398 
7399     Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
7400     Ops.pop_back();
7401 
7402     switch (BuiltinID) {
7403     default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
7404     case PPC::BI__builtin_altivec_lvx:
7405       ID = Intrinsic::ppc_altivec_lvx;
7406       break;
7407     case PPC::BI__builtin_altivec_lvxl:
7408       ID = Intrinsic::ppc_altivec_lvxl;
7409       break;
7410     case PPC::BI__builtin_altivec_lvebx:
7411       ID = Intrinsic::ppc_altivec_lvebx;
7412       break;
7413     case PPC::BI__builtin_altivec_lvehx:
7414       ID = Intrinsic::ppc_altivec_lvehx;
7415       break;
7416     case PPC::BI__builtin_altivec_lvewx:
7417       ID = Intrinsic::ppc_altivec_lvewx;
7418       break;
7419     case PPC::BI__builtin_altivec_lvsl:
7420       ID = Intrinsic::ppc_altivec_lvsl;
7421       break;
7422     case PPC::BI__builtin_altivec_lvsr:
7423       ID = Intrinsic::ppc_altivec_lvsr;
7424       break;
7425     case PPC::BI__builtin_vsx_lxvd2x:
7426       ID = Intrinsic::ppc_vsx_lxvd2x;
7427       break;
7428     case PPC::BI__builtin_vsx_lxvw4x:
7429       ID = Intrinsic::ppc_vsx_lxvw4x;
7430       break;
7431     }
7432     llvm::Function *F = CGM.getIntrinsic(ID);
7433     return Builder.CreateCall(F, Ops, "");
7434   }
7435 
7436   // vec_st
7437   case PPC::BI__builtin_altivec_stvx:
7438   case PPC::BI__builtin_altivec_stvxl:
7439   case PPC::BI__builtin_altivec_stvebx:
7440   case PPC::BI__builtin_altivec_stvehx:
7441   case PPC::BI__builtin_altivec_stvewx:
7442   case PPC::BI__builtin_vsx_stxvd2x:
7443   case PPC::BI__builtin_vsx_stxvw4x:
7444   {
7445     Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
7446     Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
7447     Ops.pop_back();
7448 
7449     switch (BuiltinID) {
7450     default: llvm_unreachable("Unsupported st intrinsic!");
7451     case PPC::BI__builtin_altivec_stvx:
7452       ID = Intrinsic::ppc_altivec_stvx;
7453       break;
7454     case PPC::BI__builtin_altivec_stvxl:
7455       ID = Intrinsic::ppc_altivec_stvxl;
7456       break;
7457     case PPC::BI__builtin_altivec_stvebx:
7458       ID = Intrinsic::ppc_altivec_stvebx;
7459       break;
7460     case PPC::BI__builtin_altivec_stvehx:
7461       ID = Intrinsic::ppc_altivec_stvehx;
7462       break;
7463     case PPC::BI__builtin_altivec_stvewx:
7464       ID = Intrinsic::ppc_altivec_stvewx;
7465       break;
7466     case PPC::BI__builtin_vsx_stxvd2x:
7467       ID = Intrinsic::ppc_vsx_stxvd2x;
7468       break;
7469     case PPC::BI__builtin_vsx_stxvw4x:
7470       ID = Intrinsic::ppc_vsx_stxvw4x;
7471       break;
7472     }
7473     llvm::Function *F = CGM.getIntrinsic(ID);
7474     return Builder.CreateCall(F, Ops, "");
7475   }
7476   // Square root
7477   case PPC::BI__builtin_vsx_xvsqrtsp:
7478   case PPC::BI__builtin_vsx_xvsqrtdp: {
7479     llvm::Type *ResultType = ConvertType(E->getType());
7480     Value *X = EmitScalarExpr(E->getArg(0));
7481     ID = Intrinsic::sqrt;
7482     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7483     return Builder.CreateCall(F, X);
7484   }
7485   // Count leading zeros
7486   case PPC::BI__builtin_altivec_vclzb:
7487   case PPC::BI__builtin_altivec_vclzh:
7488   case PPC::BI__builtin_altivec_vclzw:
7489   case PPC::BI__builtin_altivec_vclzd: {
7490     llvm::Type *ResultType = ConvertType(E->getType());
7491     Value *X = EmitScalarExpr(E->getArg(0));
7492     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7493     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7494     return Builder.CreateCall(F, {X, Undef});
7495   }
7496   // Copy sign
7497   case PPC::BI__builtin_vsx_xvcpsgnsp:
7498   case PPC::BI__builtin_vsx_xvcpsgndp: {
7499     llvm::Type *ResultType = ConvertType(E->getType());
7500     Value *X = EmitScalarExpr(E->getArg(0));
7501     Value *Y = EmitScalarExpr(E->getArg(1));
7502     ID = Intrinsic::copysign;
7503     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7504     return Builder.CreateCall(F, {X, Y});
7505   }
7506   // Rounding/truncation
7507   case PPC::BI__builtin_vsx_xvrspip:
7508   case PPC::BI__builtin_vsx_xvrdpip:
7509   case PPC::BI__builtin_vsx_xvrdpim:
7510   case PPC::BI__builtin_vsx_xvrspim:
7511   case PPC::BI__builtin_vsx_xvrdpi:
7512   case PPC::BI__builtin_vsx_xvrspi:
7513   case PPC::BI__builtin_vsx_xvrdpic:
7514   case PPC::BI__builtin_vsx_xvrspic:
7515   case PPC::BI__builtin_vsx_xvrdpiz:
7516   case PPC::BI__builtin_vsx_xvrspiz: {
7517     llvm::Type *ResultType = ConvertType(E->getType());
7518     Value *X = EmitScalarExpr(E->getArg(0));
7519     if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
7520         BuiltinID == PPC::BI__builtin_vsx_xvrspim)
7521       ID = Intrinsic::floor;
7522     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
7523              BuiltinID == PPC::BI__builtin_vsx_xvrspi)
7524       ID = Intrinsic::round;
7525     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
7526              BuiltinID == PPC::BI__builtin_vsx_xvrspic)
7527       ID = Intrinsic::nearbyint;
7528     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
7529              BuiltinID == PPC::BI__builtin_vsx_xvrspip)
7530       ID = Intrinsic::ceil;
7531     else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
7532              BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
7533       ID = Intrinsic::trunc;
7534     llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
7535     return Builder.CreateCall(F, X);
7536   }
7537 
7538   // Absolute value
7539   case PPC::BI__builtin_vsx_xvabsdp:
7540   case PPC::BI__builtin_vsx_xvabssp: {
7541     llvm::Type *ResultType = ConvertType(E->getType());
7542     Value *X = EmitScalarExpr(E->getArg(0));
7543     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7544     return Builder.CreateCall(F, X);
7545   }
7546 
7547   // FMA variations
7548   case PPC::BI__builtin_vsx_xvmaddadp:
7549   case PPC::BI__builtin_vsx_xvmaddasp:
7550   case PPC::BI__builtin_vsx_xvnmaddadp:
7551   case PPC::BI__builtin_vsx_xvnmaddasp:
7552   case PPC::BI__builtin_vsx_xvmsubadp:
7553   case PPC::BI__builtin_vsx_xvmsubasp:
7554   case PPC::BI__builtin_vsx_xvnmsubadp:
7555   case PPC::BI__builtin_vsx_xvnmsubasp: {
7556     llvm::Type *ResultType = ConvertType(E->getType());
7557     Value *X = EmitScalarExpr(E->getArg(0));
7558     Value *Y = EmitScalarExpr(E->getArg(1));
7559     Value *Z = EmitScalarExpr(E->getArg(2));
7560     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7561     llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7562     switch (BuiltinID) {
7563       case PPC::BI__builtin_vsx_xvmaddadp:
7564       case PPC::BI__builtin_vsx_xvmaddasp:
7565         return Builder.CreateCall(F, {X, Y, Z});
7566       case PPC::BI__builtin_vsx_xvnmaddadp:
7567       case PPC::BI__builtin_vsx_xvnmaddasp:
7568         return Builder.CreateFSub(Zero,
7569                                   Builder.CreateCall(F, {X, Y, Z}), "sub");
7570       case PPC::BI__builtin_vsx_xvmsubadp:
7571       case PPC::BI__builtin_vsx_xvmsubasp:
7572         return Builder.CreateCall(F,
7573                                   {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7574       case PPC::BI__builtin_vsx_xvnmsubadp:
7575       case PPC::BI__builtin_vsx_xvnmsubasp:
7576         Value *FsubRes =
7577           Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7578         return Builder.CreateFSub(Zero, FsubRes, "sub");
7579     }
7580     llvm_unreachable("Unknown FMA operation");
7581     return nullptr; // Suppress no-return warning
7582   }
7583   }
7584 }
7585 
EmitAMDGPUBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7586 Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
7587                                               const CallExpr *E) {
7588   switch (BuiltinID) {
7589   case AMDGPU::BI__builtin_amdgcn_div_scale:
7590   case AMDGPU::BI__builtin_amdgcn_div_scalef: {
7591     // Translate from the intrinsics's struct return to the builtin's out
7592     // argument.
7593 
7594     Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
7595 
7596     llvm::Value *X = EmitScalarExpr(E->getArg(0));
7597     llvm::Value *Y = EmitScalarExpr(E->getArg(1));
7598     llvm::Value *Z = EmitScalarExpr(E->getArg(2));
7599 
7600     llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
7601                                            X->getType());
7602 
7603     llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
7604 
7605     llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
7606     llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
7607 
7608     llvm::Type *RealFlagType
7609       = FlagOutPtr.getPointer()->getType()->getPointerElementType();
7610 
7611     llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
7612     Builder.CreateStore(FlagExt, FlagOutPtr);
7613     return Result;
7614   }
7615   case AMDGPU::BI__builtin_amdgcn_div_fmas:
7616   case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
7617     llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
7618     llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
7619     llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
7620     llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
7621 
7622     llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
7623                                       Src0->getType());
7624     llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
7625     return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
7626   }
7627   case AMDGPU::BI__builtin_amdgcn_div_fixup:
7628   case AMDGPU::BI__builtin_amdgcn_div_fixupf:
7629     return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
7630   case AMDGPU::BI__builtin_amdgcn_trig_preop:
7631   case AMDGPU::BI__builtin_amdgcn_trig_preopf:
7632     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
7633   case AMDGPU::BI__builtin_amdgcn_rcp:
7634   case AMDGPU::BI__builtin_amdgcn_rcpf:
7635     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
7636   case AMDGPU::BI__builtin_amdgcn_rsq:
7637   case AMDGPU::BI__builtin_amdgcn_rsqf:
7638     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7639   case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
7640   case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
7641     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
7642   case AMDGPU::BI__builtin_amdgcn_sinf:
7643     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
7644   case AMDGPU::BI__builtin_amdgcn_cosf:
7645     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
7646   case AMDGPU::BI__builtin_amdgcn_log_clampf:
7647     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
7648   case AMDGPU::BI__builtin_amdgcn_ldexp:
7649   case AMDGPU::BI__builtin_amdgcn_ldexpf:
7650     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7651   case AMDGPU::BI__builtin_amdgcn_frexp_mant:
7652   case AMDGPU::BI__builtin_amdgcn_frexp_mantf: {
7653     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
7654   }
7655   case AMDGPU::BI__builtin_amdgcn_frexp_exp:
7656   case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
7657     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_exp);
7658   }
7659   case AMDGPU::BI__builtin_amdgcn_fract:
7660   case AMDGPU::BI__builtin_amdgcn_fractf:
7661     return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
7662   case AMDGPU::BI__builtin_amdgcn_class:
7663   case AMDGPU::BI__builtin_amdgcn_classf:
7664     return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
7665 
7666   case AMDGPU::BI__builtin_amdgcn_read_exec: {
7667     CallInst *CI = cast<CallInst>(
7668       EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
7669     CI->setConvergent();
7670     return CI;
7671   }
7672   // Legacy amdgpu prefix
7673   case AMDGPU::BI__builtin_amdgpu_rsq:
7674   case AMDGPU::BI__builtin_amdgpu_rsqf: {
7675     if (getTarget().getTriple().getArch() == Triple::amdgcn)
7676       return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
7677     return emitUnaryBuiltin(*this, E, Intrinsic::r600_rsq);
7678   }
7679   case AMDGPU::BI__builtin_amdgpu_ldexp:
7680   case AMDGPU::BI__builtin_amdgpu_ldexpf: {
7681     if (getTarget().getTriple().getArch() == Triple::amdgcn)
7682       return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
7683     return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
7684   }
7685 
7686   // amdgcn workitem
7687   case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
7688     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
7689   case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
7690     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
7691   case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
7692     return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
7693 
7694   // r600 workitem
7695   case AMDGPU::BI__builtin_r600_read_tidig_x:
7696     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
7697   case AMDGPU::BI__builtin_r600_read_tidig_y:
7698     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
7699   case AMDGPU::BI__builtin_r600_read_tidig_z:
7700     return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
7701   default:
7702     return nullptr;
7703   }
7704 }
7705 
7706 /// Handle a SystemZ function in which the final argument is a pointer
7707 /// to an int that receives the post-instruction CC value.  At the LLVM level
7708 /// this is represented as a function that returns a {result, cc} pair.
EmitSystemZIntrinsicWithCC(CodeGenFunction & CGF,unsigned IntrinsicID,const CallExpr * E)7709 static Value *EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF,
7710                                          unsigned IntrinsicID,
7711                                          const CallExpr *E) {
7712   unsigned NumArgs = E->getNumArgs() - 1;
7713   SmallVector<Value *, 8> Args(NumArgs);
7714   for (unsigned I = 0; I < NumArgs; ++I)
7715     Args[I] = CGF.EmitScalarExpr(E->getArg(I));
7716   Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
7717   Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
7718   Value *Call = CGF.Builder.CreateCall(F, Args);
7719   Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
7720   CGF.Builder.CreateStore(CC, CCPtr);
7721   return CGF.Builder.CreateExtractValue(Call, 0);
7722 }
7723 
EmitSystemZBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7724 Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
7725                                                const CallExpr *E) {
7726   switch (BuiltinID) {
7727   case SystemZ::BI__builtin_tbegin: {
7728     Value *TDB = EmitScalarExpr(E->getArg(0));
7729     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7730     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
7731     return Builder.CreateCall(F, {TDB, Control});
7732   }
7733   case SystemZ::BI__builtin_tbegin_nofloat: {
7734     Value *TDB = EmitScalarExpr(E->getArg(0));
7735     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
7736     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
7737     return Builder.CreateCall(F, {TDB, Control});
7738   }
7739   case SystemZ::BI__builtin_tbeginc: {
7740     Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
7741     Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
7742     Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
7743     return Builder.CreateCall(F, {TDB, Control});
7744   }
7745   case SystemZ::BI__builtin_tabort: {
7746     Value *Data = EmitScalarExpr(E->getArg(0));
7747     Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
7748     return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
7749   }
7750   case SystemZ::BI__builtin_non_tx_store: {
7751     Value *Address = EmitScalarExpr(E->getArg(0));
7752     Value *Data = EmitScalarExpr(E->getArg(1));
7753     Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
7754     return Builder.CreateCall(F, {Data, Address});
7755   }
7756 
7757   // Vector builtins.  Note that most vector builtins are mapped automatically
7758   // to target-specific LLVM intrinsics.  The ones handled specially here can
7759   // be represented via standard LLVM IR, which is preferable to enable common
7760   // LLVM optimizations.
7761 
7762   case SystemZ::BI__builtin_s390_vpopctb:
7763   case SystemZ::BI__builtin_s390_vpopcth:
7764   case SystemZ::BI__builtin_s390_vpopctf:
7765   case SystemZ::BI__builtin_s390_vpopctg: {
7766     llvm::Type *ResultType = ConvertType(E->getType());
7767     Value *X = EmitScalarExpr(E->getArg(0));
7768     Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7769     return Builder.CreateCall(F, X);
7770   }
7771 
7772   case SystemZ::BI__builtin_s390_vclzb:
7773   case SystemZ::BI__builtin_s390_vclzh:
7774   case SystemZ::BI__builtin_s390_vclzf:
7775   case SystemZ::BI__builtin_s390_vclzg: {
7776     llvm::Type *ResultType = ConvertType(E->getType());
7777     Value *X = EmitScalarExpr(E->getArg(0));
7778     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7779     Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
7780     return Builder.CreateCall(F, {X, Undef});
7781   }
7782 
7783   case SystemZ::BI__builtin_s390_vctzb:
7784   case SystemZ::BI__builtin_s390_vctzh:
7785   case SystemZ::BI__builtin_s390_vctzf:
7786   case SystemZ::BI__builtin_s390_vctzg: {
7787     llvm::Type *ResultType = ConvertType(E->getType());
7788     Value *X = EmitScalarExpr(E->getArg(0));
7789     Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
7790     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
7791     return Builder.CreateCall(F, {X, Undef});
7792   }
7793 
7794   case SystemZ::BI__builtin_s390_vfsqdb: {
7795     llvm::Type *ResultType = ConvertType(E->getType());
7796     Value *X = EmitScalarExpr(E->getArg(0));
7797     Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
7798     return Builder.CreateCall(F, X);
7799   }
7800   case SystemZ::BI__builtin_s390_vfmadb: {
7801     llvm::Type *ResultType = ConvertType(E->getType());
7802     Value *X = EmitScalarExpr(E->getArg(0));
7803     Value *Y = EmitScalarExpr(E->getArg(1));
7804     Value *Z = EmitScalarExpr(E->getArg(2));
7805     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7806     return Builder.CreateCall(F, {X, Y, Z});
7807   }
7808   case SystemZ::BI__builtin_s390_vfmsdb: {
7809     llvm::Type *ResultType = ConvertType(E->getType());
7810     Value *X = EmitScalarExpr(E->getArg(0));
7811     Value *Y = EmitScalarExpr(E->getArg(1));
7812     Value *Z = EmitScalarExpr(E->getArg(2));
7813     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7814     Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
7815     return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
7816   }
7817   case SystemZ::BI__builtin_s390_vflpdb: {
7818     llvm::Type *ResultType = ConvertType(E->getType());
7819     Value *X = EmitScalarExpr(E->getArg(0));
7820     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7821     return Builder.CreateCall(F, X);
7822   }
7823   case SystemZ::BI__builtin_s390_vflndb: {
7824     llvm::Type *ResultType = ConvertType(E->getType());
7825     Value *X = EmitScalarExpr(E->getArg(0));
7826     Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
7827     Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
7828     return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
7829   }
7830   case SystemZ::BI__builtin_s390_vfidb: {
7831     llvm::Type *ResultType = ConvertType(E->getType());
7832     Value *X = EmitScalarExpr(E->getArg(0));
7833     // Constant-fold the M4 and M5 mask arguments.
7834     llvm::APSInt M4, M5;
7835     bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
7836     bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
7837     assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
7838     (void)IsConstM4; (void)IsConstM5;
7839     // Check whether this instance of vfidb can be represented via a LLVM
7840     // standard intrinsic.  We only support some combinations of M4 and M5.
7841     Intrinsic::ID ID = Intrinsic::not_intrinsic;
7842     switch (M4.getZExtValue()) {
7843     default: break;
7844     case 0:  // IEEE-inexact exception allowed
7845       switch (M5.getZExtValue()) {
7846       default: break;
7847       case 0: ID = Intrinsic::rint; break;
7848       }
7849       break;
7850     case 4:  // IEEE-inexact exception suppressed
7851       switch (M5.getZExtValue()) {
7852       default: break;
7853       case 0: ID = Intrinsic::nearbyint; break;
7854       case 1: ID = Intrinsic::round; break;
7855       case 5: ID = Intrinsic::trunc; break;
7856       case 6: ID = Intrinsic::ceil; break;
7857       case 7: ID = Intrinsic::floor; break;
7858       }
7859       break;
7860     }
7861     if (ID != Intrinsic::not_intrinsic) {
7862       Function *F = CGM.getIntrinsic(ID, ResultType);
7863       return Builder.CreateCall(F, X);
7864     }
7865     Function *F = CGM.getIntrinsic(Intrinsic::s390_vfidb);
7866     Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
7867     Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
7868     return Builder.CreateCall(F, {X, M4Value, M5Value});
7869   }
7870 
7871   // Vector intrisincs that output the post-instruction CC value.
7872 
7873 #define INTRINSIC_WITH_CC(NAME) \
7874     case SystemZ::BI__builtin_##NAME: \
7875       return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
7876 
7877   INTRINSIC_WITH_CC(s390_vpkshs);
7878   INTRINSIC_WITH_CC(s390_vpksfs);
7879   INTRINSIC_WITH_CC(s390_vpksgs);
7880 
7881   INTRINSIC_WITH_CC(s390_vpklshs);
7882   INTRINSIC_WITH_CC(s390_vpklsfs);
7883   INTRINSIC_WITH_CC(s390_vpklsgs);
7884 
7885   INTRINSIC_WITH_CC(s390_vceqbs);
7886   INTRINSIC_WITH_CC(s390_vceqhs);
7887   INTRINSIC_WITH_CC(s390_vceqfs);
7888   INTRINSIC_WITH_CC(s390_vceqgs);
7889 
7890   INTRINSIC_WITH_CC(s390_vchbs);
7891   INTRINSIC_WITH_CC(s390_vchhs);
7892   INTRINSIC_WITH_CC(s390_vchfs);
7893   INTRINSIC_WITH_CC(s390_vchgs);
7894 
7895   INTRINSIC_WITH_CC(s390_vchlbs);
7896   INTRINSIC_WITH_CC(s390_vchlhs);
7897   INTRINSIC_WITH_CC(s390_vchlfs);
7898   INTRINSIC_WITH_CC(s390_vchlgs);
7899 
7900   INTRINSIC_WITH_CC(s390_vfaebs);
7901   INTRINSIC_WITH_CC(s390_vfaehs);
7902   INTRINSIC_WITH_CC(s390_vfaefs);
7903 
7904   INTRINSIC_WITH_CC(s390_vfaezbs);
7905   INTRINSIC_WITH_CC(s390_vfaezhs);
7906   INTRINSIC_WITH_CC(s390_vfaezfs);
7907 
7908   INTRINSIC_WITH_CC(s390_vfeebs);
7909   INTRINSIC_WITH_CC(s390_vfeehs);
7910   INTRINSIC_WITH_CC(s390_vfeefs);
7911 
7912   INTRINSIC_WITH_CC(s390_vfeezbs);
7913   INTRINSIC_WITH_CC(s390_vfeezhs);
7914   INTRINSIC_WITH_CC(s390_vfeezfs);
7915 
7916   INTRINSIC_WITH_CC(s390_vfenebs);
7917   INTRINSIC_WITH_CC(s390_vfenehs);
7918   INTRINSIC_WITH_CC(s390_vfenefs);
7919 
7920   INTRINSIC_WITH_CC(s390_vfenezbs);
7921   INTRINSIC_WITH_CC(s390_vfenezhs);
7922   INTRINSIC_WITH_CC(s390_vfenezfs);
7923 
7924   INTRINSIC_WITH_CC(s390_vistrbs);
7925   INTRINSIC_WITH_CC(s390_vistrhs);
7926   INTRINSIC_WITH_CC(s390_vistrfs);
7927 
7928   INTRINSIC_WITH_CC(s390_vstrcbs);
7929   INTRINSIC_WITH_CC(s390_vstrchs);
7930   INTRINSIC_WITH_CC(s390_vstrcfs);
7931 
7932   INTRINSIC_WITH_CC(s390_vstrczbs);
7933   INTRINSIC_WITH_CC(s390_vstrczhs);
7934   INTRINSIC_WITH_CC(s390_vstrczfs);
7935 
7936   INTRINSIC_WITH_CC(s390_vfcedbs);
7937   INTRINSIC_WITH_CC(s390_vfchdbs);
7938   INTRINSIC_WITH_CC(s390_vfchedbs);
7939 
7940   INTRINSIC_WITH_CC(s390_vftcidb);
7941 
7942 #undef INTRINSIC_WITH_CC
7943 
7944   default:
7945     return nullptr;
7946   }
7947 }
7948 
EmitNVPTXBuiltinExpr(unsigned BuiltinID,const CallExpr * E)7949 Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
7950                                              const CallExpr *E) {
7951   auto MakeLdg = [&](unsigned IntrinsicID) {
7952     Value *Ptr = EmitScalarExpr(E->getArg(0));
7953     AlignmentSource AlignSource;
7954     clang::CharUnits Align =
7955         getNaturalPointeeTypeAlignment(E->getArg(0)->getType(), &AlignSource);
7956     return Builder.CreateCall(
7957         CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
7958                                        Ptr->getType()}),
7959         {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
7960   };
7961 
7962   switch (BuiltinID) {
7963   case NVPTX::BI__nvvm_atom_add_gen_i:
7964   case NVPTX::BI__nvvm_atom_add_gen_l:
7965   case NVPTX::BI__nvvm_atom_add_gen_ll:
7966     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
7967 
7968   case NVPTX::BI__nvvm_atom_sub_gen_i:
7969   case NVPTX::BI__nvvm_atom_sub_gen_l:
7970   case NVPTX::BI__nvvm_atom_sub_gen_ll:
7971     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
7972 
7973   case NVPTX::BI__nvvm_atom_and_gen_i:
7974   case NVPTX::BI__nvvm_atom_and_gen_l:
7975   case NVPTX::BI__nvvm_atom_and_gen_ll:
7976     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::And, E);
7977 
7978   case NVPTX::BI__nvvm_atom_or_gen_i:
7979   case NVPTX::BI__nvvm_atom_or_gen_l:
7980   case NVPTX::BI__nvvm_atom_or_gen_ll:
7981     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
7982 
7983   case NVPTX::BI__nvvm_atom_xor_gen_i:
7984   case NVPTX::BI__nvvm_atom_xor_gen_l:
7985   case NVPTX::BI__nvvm_atom_xor_gen_ll:
7986     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
7987 
7988   case NVPTX::BI__nvvm_atom_xchg_gen_i:
7989   case NVPTX::BI__nvvm_atom_xchg_gen_l:
7990   case NVPTX::BI__nvvm_atom_xchg_gen_ll:
7991     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
7992 
7993   case NVPTX::BI__nvvm_atom_max_gen_i:
7994   case NVPTX::BI__nvvm_atom_max_gen_l:
7995   case NVPTX::BI__nvvm_atom_max_gen_ll:
7996     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
7997 
7998   case NVPTX::BI__nvvm_atom_max_gen_ui:
7999   case NVPTX::BI__nvvm_atom_max_gen_ul:
8000   case NVPTX::BI__nvvm_atom_max_gen_ull:
8001     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
8002 
8003   case NVPTX::BI__nvvm_atom_min_gen_i:
8004   case NVPTX::BI__nvvm_atom_min_gen_l:
8005   case NVPTX::BI__nvvm_atom_min_gen_ll:
8006     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
8007 
8008   case NVPTX::BI__nvvm_atom_min_gen_ui:
8009   case NVPTX::BI__nvvm_atom_min_gen_ul:
8010   case NVPTX::BI__nvvm_atom_min_gen_ull:
8011     return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
8012 
8013   case NVPTX::BI__nvvm_atom_cas_gen_i:
8014   case NVPTX::BI__nvvm_atom_cas_gen_l:
8015   case NVPTX::BI__nvvm_atom_cas_gen_ll:
8016     // __nvvm_atom_cas_gen_* should return the old value rather than the
8017     // success flag.
8018     return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
8019 
8020   case NVPTX::BI__nvvm_atom_add_gen_f: {
8021     Value *Ptr = EmitScalarExpr(E->getArg(0));
8022     Value *Val = EmitScalarExpr(E->getArg(1));
8023     // atomicrmw only deals with integer arguments so we need to use
8024     // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
8025     Value *FnALAF32 =
8026         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
8027     return Builder.CreateCall(FnALAF32, {Ptr, Val});
8028   }
8029 
8030   case NVPTX::BI__nvvm_atom_inc_gen_ui: {
8031     Value *Ptr = EmitScalarExpr(E->getArg(0));
8032     Value *Val = EmitScalarExpr(E->getArg(1));
8033     Value *FnALI32 =
8034         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
8035     return Builder.CreateCall(FnALI32, {Ptr, Val});
8036   }
8037 
8038   case NVPTX::BI__nvvm_atom_dec_gen_ui: {
8039     Value *Ptr = EmitScalarExpr(E->getArg(0));
8040     Value *Val = EmitScalarExpr(E->getArg(1));
8041     Value *FnALD32 =
8042         CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
8043     return Builder.CreateCall(FnALD32, {Ptr, Val});
8044   }
8045 
8046   case NVPTX::BI__nvvm_ldg_c:
8047   case NVPTX::BI__nvvm_ldg_c2:
8048   case NVPTX::BI__nvvm_ldg_c4:
8049   case NVPTX::BI__nvvm_ldg_s:
8050   case NVPTX::BI__nvvm_ldg_s2:
8051   case NVPTX::BI__nvvm_ldg_s4:
8052   case NVPTX::BI__nvvm_ldg_i:
8053   case NVPTX::BI__nvvm_ldg_i2:
8054   case NVPTX::BI__nvvm_ldg_i4:
8055   case NVPTX::BI__nvvm_ldg_l:
8056   case NVPTX::BI__nvvm_ldg_ll:
8057   case NVPTX::BI__nvvm_ldg_ll2:
8058   case NVPTX::BI__nvvm_ldg_uc:
8059   case NVPTX::BI__nvvm_ldg_uc2:
8060   case NVPTX::BI__nvvm_ldg_uc4:
8061   case NVPTX::BI__nvvm_ldg_us:
8062   case NVPTX::BI__nvvm_ldg_us2:
8063   case NVPTX::BI__nvvm_ldg_us4:
8064   case NVPTX::BI__nvvm_ldg_ui:
8065   case NVPTX::BI__nvvm_ldg_ui2:
8066   case NVPTX::BI__nvvm_ldg_ui4:
8067   case NVPTX::BI__nvvm_ldg_ul:
8068   case NVPTX::BI__nvvm_ldg_ull:
8069   case NVPTX::BI__nvvm_ldg_ull2:
8070     // PTX Interoperability section 2.2: "For a vector with an even number of
8071     // elements, its alignment is set to number of elements times the alignment
8072     // of its member: n*alignof(t)."
8073     return MakeLdg(Intrinsic::nvvm_ldg_global_i);
8074   case NVPTX::BI__nvvm_ldg_f:
8075   case NVPTX::BI__nvvm_ldg_f2:
8076   case NVPTX::BI__nvvm_ldg_f4:
8077   case NVPTX::BI__nvvm_ldg_d:
8078   case NVPTX::BI__nvvm_ldg_d2:
8079     return MakeLdg(Intrinsic::nvvm_ldg_global_f);
8080   default:
8081     return nullptr;
8082   }
8083 }
8084 
EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,const CallExpr * E)8085 Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
8086                                                    const CallExpr *E) {
8087   switch (BuiltinID) {
8088   case WebAssembly::BI__builtin_wasm_current_memory: {
8089     llvm::Type *ResultType = ConvertType(E->getType());
8090     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
8091     return Builder.CreateCall(Callee);
8092   }
8093   case WebAssembly::BI__builtin_wasm_grow_memory: {
8094     Value *X = EmitScalarExpr(E->getArg(0));
8095     Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
8096     return Builder.CreateCall(Callee, X);
8097   }
8098 
8099   default:
8100     return nullptr;
8101   }
8102 }
8103