1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This tablegen backend is responsible for emitting arm_neon.h, which includes
11 // a declaration and definition of each function specified by the ARM NEON
12 // compiler interface. See ARM document DUI0348B.
13 //
14 // Each NEON instruction is implemented in terms of 1 or more functions which
15 // are suffixed with the element type of the input vectors. Functions may be
16 // implemented in terms of generic vector operations such as +, *, -, etc. or
17 // by calling a __builtin_-prefixed function which will be handled by clang's
18 // CodeGen library.
19 //
20 // Additional validation code can be generated by this file when runHeader() is
21 // called, rather than the normal run() entry point.
22 //
23 // See also the documentation in include/clang/Basic/arm_neon.td.
24 //
25 //===----------------------------------------------------------------------===//
26
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SmallString.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/ADT/StringMap.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/TableGen/Error.h"
34 #include "llvm/TableGen/Record.h"
35 #include "llvm/TableGen/SetTheory.h"
36 #include "llvm/TableGen/TableGenBackend.h"
37 #include <algorithm>
38 #include <map>
39 #include <sstream>
40 #include <string>
41 #include <vector>
42 using namespace llvm;
43
44 namespace {
45
46 // While globals are generally bad, this one allows us to perform assertions
47 // liberally and somehow still trace them back to the def they indirectly
48 // came from.
49 static Record *CurrentRecord = nullptr;
assert_with_loc(bool Assertion,const std::string & Str)50 static void assert_with_loc(bool Assertion, const std::string &Str) {
51 if (!Assertion) {
52 if (CurrentRecord)
53 PrintFatalError(CurrentRecord->getLoc(), Str);
54 else
55 PrintFatalError(Str);
56 }
57 }
58
59 enum ClassKind {
60 ClassNone,
61 ClassI, // generic integer instruction, e.g., "i8" suffix
62 ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
63 ClassW, // width-specific instruction, e.g., "8" suffix
64 ClassB, // bitcast arguments with enum argument to specify type
65 ClassL, // Logical instructions which are op instructions
66 // but we need to not emit any suffix for in our
67 // tests.
68 ClassNoTest // Instructions which we do not test since they are
69 // not TRUE instructions.
70 };
71
72 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
73 /// builtins. These must be kept in sync with the flags in
74 /// include/clang/Basic/TargetBuiltins.h.
75 namespace NeonTypeFlags {
76 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
77
78 enum EltType {
79 Int8,
80 Int16,
81 Int32,
82 Int64,
83 Poly8,
84 Poly16,
85 Poly64,
86 Poly128,
87 Float16,
88 Float32,
89 Float64
90 };
91 }
92
93 class Intrinsic;
94 class NeonEmitter;
95 class Type;
96 class Variable;
97
98 //===----------------------------------------------------------------------===//
99 // TypeSpec
100 //===----------------------------------------------------------------------===//
101
102 /// A TypeSpec is just a simple wrapper around a string, but gets its own type
103 /// for strong typing purposes.
104 ///
105 /// A TypeSpec can be used to create a type.
106 class TypeSpec : public std::string {
107 public:
fromTypeSpecs(StringRef Str)108 static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
109 std::vector<TypeSpec> Ret;
110 TypeSpec Acc;
111 for (char I : Str.str()) {
112 if (islower(I)) {
113 Acc.push_back(I);
114 Ret.push_back(TypeSpec(Acc));
115 Acc.clear();
116 } else {
117 Acc.push_back(I);
118 }
119 }
120 return Ret;
121 }
122 };
123
124 //===----------------------------------------------------------------------===//
125 // Type
126 //===----------------------------------------------------------------------===//
127
128 /// A Type. Not much more to say here.
129 class Type {
130 private:
131 TypeSpec TS;
132
133 bool Float, Signed, Void, Poly, Constant, Pointer;
134 // ScalarForMangling and NoManglingQ are really not suited to live here as
135 // they are not related to the type. But they live in the TypeSpec (not the
136 // prototype), so this is really the only place to store them.
137 bool ScalarForMangling, NoManglingQ;
138 unsigned Bitwidth, ElementBitwidth, NumVectors;
139
140 public:
Type()141 Type()
142 : Float(false), Signed(false), Void(true), Poly(false), Constant(false),
143 Pointer(false), ScalarForMangling(false), NoManglingQ(false),
144 Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
145
Type(TypeSpec TS,char CharMod)146 Type(TypeSpec TS, char CharMod)
147 : TS(TS), Float(false), Signed(false), Void(false), Poly(false),
148 Constant(false), Pointer(false), ScalarForMangling(false),
149 NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
150 applyModifier(CharMod);
151 }
152
153 /// Returns a type representing "void".
getVoid()154 static Type getVoid() { return Type(); }
155
operator ==(const Type & Other) const156 bool operator==(const Type &Other) const { return str() == Other.str(); }
operator !=(const Type & Other) const157 bool operator!=(const Type &Other) const { return !operator==(Other); }
158
159 //
160 // Query functions
161 //
isScalarForMangling() const162 bool isScalarForMangling() const { return ScalarForMangling; }
noManglingQ() const163 bool noManglingQ() const { return NoManglingQ; }
164
isPointer() const165 bool isPointer() const { return Pointer; }
isFloating() const166 bool isFloating() const { return Float; }
isInteger() const167 bool isInteger() const { return !Float && !Poly; }
isSigned() const168 bool isSigned() const { return Signed; }
isScalar() const169 bool isScalar() const { return NumVectors == 0; }
isVector() const170 bool isVector() const { return NumVectors > 0; }
isFloat() const171 bool isFloat() const { return Float && ElementBitwidth == 32; }
isDouble() const172 bool isDouble() const { return Float && ElementBitwidth == 64; }
isHalf() const173 bool isHalf() const { return Float && ElementBitwidth == 16; }
isPoly() const174 bool isPoly() const { return Poly; }
isChar() const175 bool isChar() const { return ElementBitwidth == 8; }
isShort() const176 bool isShort() const { return !Float && ElementBitwidth == 16; }
isInt() const177 bool isInt() const { return !Float && ElementBitwidth == 32; }
isLong() const178 bool isLong() const { return !Float && ElementBitwidth == 64; }
isVoid() const179 bool isVoid() const { return Void; }
getNumElements() const180 unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
getSizeInBits() const181 unsigned getSizeInBits() const { return Bitwidth; }
getElementSizeInBits() const182 unsigned getElementSizeInBits() const { return ElementBitwidth; }
getNumVectors() const183 unsigned getNumVectors() const { return NumVectors; }
184
185 //
186 // Mutator functions
187 //
makeUnsigned()188 void makeUnsigned() { Signed = false; }
makeSigned()189 void makeSigned() { Signed = true; }
makeInteger(unsigned ElemWidth,bool Sign)190 void makeInteger(unsigned ElemWidth, bool Sign) {
191 Float = false;
192 Poly = false;
193 Signed = Sign;
194 ElementBitwidth = ElemWidth;
195 }
makeScalar()196 void makeScalar() {
197 Bitwidth = ElementBitwidth;
198 NumVectors = 0;
199 }
makeOneVector()200 void makeOneVector() {
201 assert(isVector());
202 NumVectors = 1;
203 }
doubleLanes()204 void doubleLanes() {
205 assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
206 Bitwidth = 128;
207 }
halveLanes()208 void halveLanes() {
209 assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
210 Bitwidth = 64;
211 }
212
213 /// Return the C string representation of a type, which is the typename
214 /// defined in stdint.h or arm_neon.h.
215 std::string str() const;
216
217 /// Return the string representation of a type, which is an encoded
218 /// string for passing to the BUILTIN() macro in Builtins.def.
219 std::string builtin_str() const;
220
221 /// Return the value in NeonTypeFlags for this type.
222 unsigned getNeonEnum() const;
223
224 /// Parse a type from a stdint.h or arm_neon.h typedef name,
225 /// for example uint32x2_t or int64_t.
226 static Type fromTypedefName(StringRef Name);
227
228 private:
229 /// Creates the type based on the typespec string in TS.
230 /// Sets "Quad" to true if the "Q" or "H" modifiers were
231 /// seen. This is needed by applyModifier as some modifiers
232 /// only take effect if the type size was changed by "Q" or "H".
233 void applyTypespec(bool &Quad);
234 /// Applies a prototype modifier to the type.
235 void applyModifier(char Mod);
236 };
237
238 //===----------------------------------------------------------------------===//
239 // Variable
240 //===----------------------------------------------------------------------===//
241
242 /// A variable is a simple class that just has a type and a name.
243 class Variable {
244 Type T;
245 std::string N;
246
247 public:
Variable()248 Variable() : T(Type::getVoid()), N("") {}
Variable(Type T,std::string N)249 Variable(Type T, std::string N) : T(T), N(N) {}
250
getType() const251 Type getType() const { return T; }
getName() const252 std::string getName() const { return "__" + N; }
253 };
254
255 //===----------------------------------------------------------------------===//
256 // Intrinsic
257 //===----------------------------------------------------------------------===//
258
259 /// The main grunt class. This represents an instantiation of an intrinsic with
260 /// a particular typespec and prototype.
261 class Intrinsic {
262 friend class DagEmitter;
263
264 /// The Record this intrinsic was created from.
265 Record *R;
266 /// The unmangled name and prototype.
267 std::string Name, Proto;
268 /// The input and output typespecs. InTS == OutTS except when
269 /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
270 TypeSpec OutTS, InTS;
271 /// The base class kind. Most intrinsics use ClassS, which has full type
272 /// info for integers (s32/u32). Some use ClassI, which doesn't care about
273 /// signedness (i32), while some (ClassB) have no type at all, only a width
274 /// (32).
275 ClassKind CK;
276 /// The list of DAGs for the body. May be empty, in which case we should
277 /// emit a builtin call.
278 ListInit *Body;
279 /// The architectural #ifdef guard.
280 std::string Guard;
281 /// Set if the Unvailable bit is 1. This means we don't generate a body,
282 /// just an "unavailable" attribute on a declaration.
283 bool IsUnavailable;
284 /// Is this intrinsic safe for big-endian? or does it need its arguments
285 /// reversing?
286 bool BigEndianSafe;
287
288 /// The types of return value [0] and parameters [1..].
289 std::vector<Type> Types;
290 /// The local variables defined.
291 std::map<std::string, Variable> Variables;
292 /// NeededEarly - set if any other intrinsic depends on this intrinsic.
293 bool NeededEarly;
294 /// UseMacro - set if we should implement using a macro or unset for a
295 /// function.
296 bool UseMacro;
297 /// The set of intrinsics that this intrinsic uses/requires.
298 std::set<Intrinsic *> Dependencies;
299 /// The "base type", which is Type('d', OutTS). InBaseType is only
300 /// different if CartesianProductOfTypes = 1 (for vreinterpret).
301 Type BaseType, InBaseType;
302 /// The return variable.
303 Variable RetVar;
304 /// A postfix to apply to every variable. Defaults to "".
305 std::string VariablePostfix;
306
307 NeonEmitter &Emitter;
308 std::stringstream OS;
309
310 public:
Intrinsic(Record * R,StringRef Name,StringRef Proto,TypeSpec OutTS,TypeSpec InTS,ClassKind CK,ListInit * Body,NeonEmitter & Emitter,StringRef Guard,bool IsUnavailable,bool BigEndianSafe)311 Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
312 TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
313 StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
314 : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
315 CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
316 BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
317 BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
318 // If this builtin takes an immediate argument, we need to #define it rather
319 // than use a standard declaration, so that SemaChecking can range check
320 // the immediate passed by the user.
321 if (Proto.find('i') != std::string::npos)
322 UseMacro = true;
323
324 // Pointer arguments need to use macros to avoid hiding aligned attributes
325 // from the pointer type.
326 if (Proto.find('p') != std::string::npos ||
327 Proto.find('c') != std::string::npos)
328 UseMacro = true;
329
330 // It is not permitted to pass or return an __fp16 by value, so intrinsics
331 // taking a scalar float16_t must be implemented as macros.
332 if (OutTS.find('h') != std::string::npos &&
333 Proto.find('s') != std::string::npos)
334 UseMacro = true;
335
336 // Modify the TypeSpec per-argument to get a concrete Type, and create
337 // known variables for each.
338 // Types[0] is the return value.
339 Types.push_back(Type(OutTS, Proto[0]));
340 for (unsigned I = 1; I < Proto.size(); ++I)
341 Types.push_back(Type(InTS, Proto[I]));
342 }
343
344 /// Get the Record that this intrinsic is based off.
getRecord() const345 Record *getRecord() const { return R; }
346 /// Get the set of Intrinsics that this intrinsic calls.
347 /// this is the set of immediate dependencies, NOT the
348 /// transitive closure.
getDependencies() const349 const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
350 /// Get the architectural guard string (#ifdef).
getGuard() const351 std::string getGuard() const { return Guard; }
352 /// Get the non-mangled name.
getName() const353 std::string getName() const { return Name; }
354
355 /// Return true if the intrinsic takes an immediate operand.
hasImmediate() const356 bool hasImmediate() const {
357 return Proto.find('i') != std::string::npos;
358 }
359 /// Return the parameter index of the immediate operand.
getImmediateIdx() const360 unsigned getImmediateIdx() const {
361 assert(hasImmediate());
362 unsigned Idx = Proto.find('i');
363 assert(Idx > 0 && "Can't return an immediate!");
364 return Idx - 1;
365 }
366
367 /// Return true if the intrinsic takes an splat operand.
hasSplat() const368 bool hasSplat() const { return Proto.find('a') != std::string::npos; }
369 /// Return the parameter index of the splat operand.
getSplatIdx() const370 unsigned getSplatIdx() const {
371 assert(hasSplat());
372 unsigned Idx = Proto.find('a');
373 assert(Idx > 0 && "Can't return a splat!");
374 return Idx - 1;
375 }
376
getNumParams() const377 unsigned getNumParams() const { return Proto.size() - 1; }
getReturnType() const378 Type getReturnType() const { return Types[0]; }
getParamType(unsigned I) const379 Type getParamType(unsigned I) const { return Types[I + 1]; }
getBaseType() const380 Type getBaseType() const { return BaseType; }
381 /// Return the raw prototype string.
getProto() const382 std::string getProto() const { return Proto; }
383
384 /// Return true if the prototype has a scalar argument.
385 /// This does not return true for the "splat" code ('a').
386 bool protoHasScalar();
387
388 /// Return the index that parameter PIndex will sit at
389 /// in a generated function call. This is often just PIndex,
390 /// but may not be as things such as multiple-vector operands
391 /// and sret parameters need to be taken into accont.
getGeneratedParamIdx(unsigned PIndex)392 unsigned getGeneratedParamIdx(unsigned PIndex) {
393 unsigned Idx = 0;
394 if (getReturnType().getNumVectors() > 1)
395 // Multiple vectors are passed as sret.
396 ++Idx;
397
398 for (unsigned I = 0; I < PIndex; ++I)
399 Idx += std::max(1U, getParamType(I).getNumVectors());
400
401 return Idx;
402 }
403
hasBody() const404 bool hasBody() const { return Body && Body->getValues().size() > 0; }
405
setNeededEarly()406 void setNeededEarly() { NeededEarly = true; }
407
operator <(const Intrinsic & Other) const408 bool operator<(const Intrinsic &Other) const {
409 // Sort lexicographically on a two-tuple (Guard, Name)
410 if (Guard != Other.Guard)
411 return Guard < Other.Guard;
412 return Name < Other.Name;
413 }
414
getClassKind(bool UseClassBIfScalar=false)415 ClassKind getClassKind(bool UseClassBIfScalar = false) {
416 if (UseClassBIfScalar && !protoHasScalar())
417 return ClassB;
418 return CK;
419 }
420
421 /// Return the name, mangled with type information.
422 /// If ForceClassS is true, use ClassS (u32/s32) instead
423 /// of the intrinsic's own type class.
424 std::string getMangledName(bool ForceClassS = false);
425 /// Return the type code for a builtin function call.
426 std::string getInstTypeCode(Type T, ClassKind CK);
427 /// Return the type string for a BUILTIN() macro in Builtins.def.
428 std::string getBuiltinTypeStr();
429
430 /// Generate the intrinsic, returning code.
431 std::string generate();
432 /// Perform type checking and populate the dependency graph, but
433 /// don't generate code yet.
434 void indexBody();
435
436 private:
437 std::string mangleName(std::string Name, ClassKind CK);
438
439 void initVariables();
440 std::string replaceParamsIn(std::string S);
441
442 void emitBodyAsBuiltinCall();
443
444 void generateImpl(bool ReverseArguments,
445 StringRef NamePrefix, StringRef CallPrefix);
446 void emitReturn();
447 void emitBody(StringRef CallPrefix);
448 void emitShadowedArgs();
449 void emitArgumentReversal();
450 void emitReturnReversal();
451 void emitReverseVariable(Variable &Dest, Variable &Src);
452 void emitNewLine();
453 void emitClosingBrace();
454 void emitOpeningBrace();
455 void emitPrototype(StringRef NamePrefix);
456
457 class DagEmitter {
458 Intrinsic &Intr;
459 StringRef CallPrefix;
460
461 public:
DagEmitter(Intrinsic & Intr,StringRef CallPrefix)462 DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
463 Intr(Intr), CallPrefix(CallPrefix) {
464 }
465 std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
466 std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
467 std::pair<Type, std::string> emitDagSplat(DagInit *DI);
468 std::pair<Type, std::string> emitDagDup(DagInit *DI);
469 std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
470 std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
471 std::pair<Type, std::string> emitDagCall(DagInit *DI);
472 std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
473 std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
474 std::pair<Type, std::string> emitDagOp(DagInit *DI);
475 std::pair<Type, std::string> emitDag(DagInit *DI);
476 };
477
478 };
479
480 //===----------------------------------------------------------------------===//
481 // NeonEmitter
482 //===----------------------------------------------------------------------===//
483
484 class NeonEmitter {
485 RecordKeeper &Records;
486 DenseMap<Record *, ClassKind> ClassMap;
487 std::map<std::string, std::vector<Intrinsic *>> IntrinsicMap;
488 unsigned UniqueNumber;
489
490 void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
491 void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
492 void genOverloadTypeCheckCode(raw_ostream &OS,
493 SmallVectorImpl<Intrinsic *> &Defs);
494 void genIntrinsicRangeCheckCode(raw_ostream &OS,
495 SmallVectorImpl<Intrinsic *> &Defs);
496
497 public:
498 /// Called by Intrinsic - this attempts to get an intrinsic that takes
499 /// the given types as arguments.
500 Intrinsic *getIntrinsic(StringRef Name, ArrayRef<Type> Types);
501
502 /// Called by Intrinsic - returns a globally-unique number.
getUniqueNumber()503 unsigned getUniqueNumber() { return UniqueNumber++; }
504
NeonEmitter(RecordKeeper & R)505 NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
506 Record *SI = R.getClass("SInst");
507 Record *II = R.getClass("IInst");
508 Record *WI = R.getClass("WInst");
509 Record *SOpI = R.getClass("SOpInst");
510 Record *IOpI = R.getClass("IOpInst");
511 Record *WOpI = R.getClass("WOpInst");
512 Record *LOpI = R.getClass("LOpInst");
513 Record *NoTestOpI = R.getClass("NoTestOpInst");
514
515 ClassMap[SI] = ClassS;
516 ClassMap[II] = ClassI;
517 ClassMap[WI] = ClassW;
518 ClassMap[SOpI] = ClassS;
519 ClassMap[IOpI] = ClassI;
520 ClassMap[WOpI] = ClassW;
521 ClassMap[LOpI] = ClassL;
522 ClassMap[NoTestOpI] = ClassNoTest;
523 }
524
525 // run - Emit arm_neon.h.inc
526 void run(raw_ostream &o);
527
528 // runHeader - Emit all the __builtin prototypes used in arm_neon.h
529 void runHeader(raw_ostream &o);
530
531 // runTests - Emit tests for all the Neon intrinsics.
532 void runTests(raw_ostream &o);
533 };
534
535 } // end anonymous namespace
536
537 //===----------------------------------------------------------------------===//
538 // Type implementation
539 //===----------------------------------------------------------------------===//
540
str() const541 std::string Type::str() const {
542 if (Void)
543 return "void";
544 std::string S;
545
546 if (!Signed && isInteger())
547 S += "u";
548
549 if (Poly)
550 S += "poly";
551 else if (Float)
552 S += "float";
553 else
554 S += "int";
555
556 S += utostr(ElementBitwidth);
557 if (isVector())
558 S += "x" + utostr(getNumElements());
559 if (NumVectors > 1)
560 S += "x" + utostr(NumVectors);
561 S += "_t";
562
563 if (Constant)
564 S += " const";
565 if (Pointer)
566 S += " *";
567
568 return S;
569 }
570
builtin_str() const571 std::string Type::builtin_str() const {
572 std::string S;
573 if (isVoid())
574 return "v";
575
576 if (Pointer)
577 // All pointers are void pointers.
578 S += "v";
579 else if (isInteger())
580 switch (ElementBitwidth) {
581 case 8: S += "c"; break;
582 case 16: S += "s"; break;
583 case 32: S += "i"; break;
584 case 64: S += "Wi"; break;
585 case 128: S += "LLLi"; break;
586 default: llvm_unreachable("Unhandled case!");
587 }
588 else
589 switch (ElementBitwidth) {
590 case 16: S += "h"; break;
591 case 32: S += "f"; break;
592 case 64: S += "d"; break;
593 default: llvm_unreachable("Unhandled case!");
594 }
595
596 if (isChar() && !Pointer)
597 // Make chars explicitly signed.
598 S = "S" + S;
599 else if (isInteger() && !Pointer && !Signed)
600 S = "U" + S;
601
602 if (isScalar()) {
603 if (Constant) S += "C";
604 if (Pointer) S += "*";
605 return S;
606 }
607
608 std::string Ret;
609 for (unsigned I = 0; I < NumVectors; ++I)
610 Ret += "V" + utostr(getNumElements()) + S;
611
612 return Ret;
613 }
614
getNeonEnum() const615 unsigned Type::getNeonEnum() const {
616 unsigned Addend;
617 switch (ElementBitwidth) {
618 case 8: Addend = 0; break;
619 case 16: Addend = 1; break;
620 case 32: Addend = 2; break;
621 case 64: Addend = 3; break;
622 case 128: Addend = 4; break;
623 default: llvm_unreachable("Unhandled element bitwidth!");
624 }
625
626 unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
627 if (Poly) {
628 // Adjustment needed because Poly32 doesn't exist.
629 if (Addend >= 2)
630 --Addend;
631 Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
632 }
633 if (Float) {
634 assert(Addend != 0 && "Float8 doesn't exist!");
635 Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
636 }
637
638 if (Bitwidth == 128)
639 Base |= (unsigned)NeonTypeFlags::QuadFlag;
640 if (isInteger() && !Signed)
641 Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
642
643 return Base;
644 }
645
fromTypedefName(StringRef Name)646 Type Type::fromTypedefName(StringRef Name) {
647 Type T;
648 T.Void = false;
649 T.Float = false;
650 T.Poly = false;
651
652 if (Name.front() == 'u') {
653 T.Signed = false;
654 Name = Name.drop_front();
655 } else {
656 T.Signed = true;
657 }
658
659 if (Name.startswith("float")) {
660 T.Float = true;
661 Name = Name.drop_front(5);
662 } else if (Name.startswith("poly")) {
663 T.Poly = true;
664 Name = Name.drop_front(4);
665 } else {
666 assert(Name.startswith("int"));
667 Name = Name.drop_front(3);
668 }
669
670 unsigned I = 0;
671 for (I = 0; I < Name.size(); ++I) {
672 if (!isdigit(Name[I]))
673 break;
674 }
675 Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
676 Name = Name.drop_front(I);
677
678 T.Bitwidth = T.ElementBitwidth;
679 T.NumVectors = 1;
680
681 if (Name.front() == 'x') {
682 Name = Name.drop_front();
683 unsigned I = 0;
684 for (I = 0; I < Name.size(); ++I) {
685 if (!isdigit(Name[I]))
686 break;
687 }
688 unsigned NumLanes;
689 Name.substr(0, I).getAsInteger(10, NumLanes);
690 Name = Name.drop_front(I);
691 T.Bitwidth = T.ElementBitwidth * NumLanes;
692 } else {
693 // Was scalar.
694 T.NumVectors = 0;
695 }
696 if (Name.front() == 'x') {
697 Name = Name.drop_front();
698 unsigned I = 0;
699 for (I = 0; I < Name.size(); ++I) {
700 if (!isdigit(Name[I]))
701 break;
702 }
703 Name.substr(0, I).getAsInteger(10, T.NumVectors);
704 Name = Name.drop_front(I);
705 }
706
707 assert(Name.startswith("_t") && "Malformed typedef!");
708 return T;
709 }
710
applyTypespec(bool & Quad)711 void Type::applyTypespec(bool &Quad) {
712 std::string S = TS;
713 ScalarForMangling = false;
714 Void = false;
715 Poly = Float = false;
716 ElementBitwidth = ~0U;
717 Signed = true;
718 NumVectors = 1;
719
720 for (char I : S) {
721 switch (I) {
722 case 'S':
723 ScalarForMangling = true;
724 break;
725 case 'H':
726 NoManglingQ = true;
727 Quad = true;
728 break;
729 case 'Q':
730 Quad = true;
731 break;
732 case 'P':
733 Poly = true;
734 break;
735 case 'U':
736 Signed = false;
737 break;
738 case 'c':
739 ElementBitwidth = 8;
740 break;
741 case 'h':
742 Float = true;
743 // Fall through
744 case 's':
745 ElementBitwidth = 16;
746 break;
747 case 'f':
748 Float = true;
749 // Fall through
750 case 'i':
751 ElementBitwidth = 32;
752 break;
753 case 'd':
754 Float = true;
755 // Fall through
756 case 'l':
757 ElementBitwidth = 64;
758 break;
759 case 'k':
760 ElementBitwidth = 128;
761 // Poly doesn't have a 128x1 type.
762 if (Poly)
763 NumVectors = 0;
764 break;
765 default:
766 llvm_unreachable("Unhandled type code!");
767 }
768 }
769 assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
770
771 Bitwidth = Quad ? 128 : 64;
772 }
773
applyModifier(char Mod)774 void Type::applyModifier(char Mod) {
775 bool AppliedQuad = false;
776 applyTypespec(AppliedQuad);
777
778 switch (Mod) {
779 case 'v':
780 Void = true;
781 break;
782 case 't':
783 if (Poly) {
784 Poly = false;
785 Signed = false;
786 }
787 break;
788 case 'b':
789 Signed = false;
790 Float = false;
791 Poly = false;
792 NumVectors = 0;
793 Bitwidth = ElementBitwidth;
794 break;
795 case '$':
796 Signed = true;
797 Float = false;
798 Poly = false;
799 NumVectors = 0;
800 Bitwidth = ElementBitwidth;
801 break;
802 case 'u':
803 Signed = false;
804 Poly = false;
805 Float = false;
806 break;
807 case 'x':
808 Signed = true;
809 assert(!Poly && "'u' can't be used with poly types!");
810 Float = false;
811 break;
812 case 'o':
813 Bitwidth = ElementBitwidth = 64;
814 NumVectors = 0;
815 Float = true;
816 break;
817 case 'y':
818 Bitwidth = ElementBitwidth = 32;
819 NumVectors = 0;
820 Float = true;
821 break;
822 case 'f':
823 // Special case - if we're half-precision, a floating
824 // point argument needs to be 128-bits (double size).
825 if (isHalf())
826 Bitwidth = 128;
827 Float = true;
828 ElementBitwidth = 32;
829 break;
830 case 'F':
831 Float = true;
832 ElementBitwidth = 64;
833 break;
834 case 'g':
835 if (AppliedQuad)
836 Bitwidth /= 2;
837 break;
838 case 'j':
839 if (!AppliedQuad)
840 Bitwidth *= 2;
841 break;
842 case 'w':
843 ElementBitwidth *= 2;
844 Bitwidth *= 2;
845 break;
846 case 'n':
847 ElementBitwidth *= 2;
848 break;
849 case 'i':
850 Float = false;
851 Poly = false;
852 ElementBitwidth = Bitwidth = 32;
853 NumVectors = 0;
854 Signed = true;
855 break;
856 case 'l':
857 Float = false;
858 Poly = false;
859 ElementBitwidth = Bitwidth = 64;
860 NumVectors = 0;
861 Signed = false;
862 break;
863 case 'z':
864 ElementBitwidth /= 2;
865 Bitwidth = ElementBitwidth;
866 NumVectors = 0;
867 break;
868 case 'r':
869 ElementBitwidth *= 2;
870 Bitwidth = ElementBitwidth;
871 NumVectors = 0;
872 break;
873 case 's':
874 case 'a':
875 Bitwidth = ElementBitwidth;
876 NumVectors = 0;
877 break;
878 case 'k':
879 Bitwidth *= 2;
880 break;
881 case 'c':
882 Constant = true;
883 // Fall through
884 case 'p':
885 Pointer = true;
886 Bitwidth = ElementBitwidth;
887 NumVectors = 0;
888 break;
889 case 'h':
890 ElementBitwidth /= 2;
891 break;
892 case 'q':
893 ElementBitwidth /= 2;
894 Bitwidth *= 2;
895 break;
896 case 'e':
897 ElementBitwidth /= 2;
898 Signed = false;
899 break;
900 case 'm':
901 ElementBitwidth /= 2;
902 Bitwidth /= 2;
903 break;
904 case 'd':
905 break;
906 case '2':
907 NumVectors = 2;
908 break;
909 case '3':
910 NumVectors = 3;
911 break;
912 case '4':
913 NumVectors = 4;
914 break;
915 case 'B':
916 NumVectors = 2;
917 if (!AppliedQuad)
918 Bitwidth *= 2;
919 break;
920 case 'C':
921 NumVectors = 3;
922 if (!AppliedQuad)
923 Bitwidth *= 2;
924 break;
925 case 'D':
926 NumVectors = 4;
927 if (!AppliedQuad)
928 Bitwidth *= 2;
929 break;
930 default:
931 llvm_unreachable("Unhandled character!");
932 }
933 }
934
935 //===----------------------------------------------------------------------===//
936 // Intrinsic implementation
937 //===----------------------------------------------------------------------===//
938
getInstTypeCode(Type T,ClassKind CK)939 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) {
940 char typeCode = '\0';
941 bool printNumber = true;
942
943 if (CK == ClassB)
944 return "";
945
946 if (T.isPoly())
947 typeCode = 'p';
948 else if (T.isInteger())
949 typeCode = T.isSigned() ? 's' : 'u';
950 else
951 typeCode = 'f';
952
953 if (CK == ClassI) {
954 switch (typeCode) {
955 default:
956 break;
957 case 's':
958 case 'u':
959 case 'p':
960 typeCode = 'i';
961 break;
962 }
963 }
964 if (CK == ClassB) {
965 typeCode = '\0';
966 }
967
968 std::string S;
969 if (typeCode != '\0')
970 S.push_back(typeCode);
971 if (printNumber)
972 S += utostr(T.getElementSizeInBits());
973
974 return S;
975 }
976
getBuiltinTypeStr()977 std::string Intrinsic::getBuiltinTypeStr() {
978 ClassKind LocalCK = getClassKind(true);
979 std::string S;
980
981 Type RetT = getReturnType();
982 if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
983 !RetT.isFloating())
984 RetT.makeInteger(RetT.getElementSizeInBits(), false);
985
986 // Since the return value must be one type, return a vector type of the
987 // appropriate width which we will bitcast. An exception is made for
988 // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
989 // fashion, storing them to a pointer arg.
990 if (RetT.getNumVectors() > 1) {
991 S += "vv*"; // void result with void* first argument
992 } else {
993 if (RetT.isPoly())
994 RetT.makeInteger(RetT.getElementSizeInBits(), false);
995 if (!RetT.isScalar() && !RetT.isSigned())
996 RetT.makeSigned();
997
998 bool ForcedVectorFloatingType = Proto[0] == 'F' || Proto[0] == 'f';
999 if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
1000 // Cast to vector of 8-bit elements.
1001 RetT.makeInteger(8, true);
1002
1003 S += RetT.builtin_str();
1004 }
1005
1006 for (unsigned I = 0; I < getNumParams(); ++I) {
1007 Type T = getParamType(I);
1008 if (T.isPoly())
1009 T.makeInteger(T.getElementSizeInBits(), false);
1010
1011 bool ForcedFloatingType = Proto[I + 1] == 'F' || Proto[I + 1] == 'f';
1012 if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
1013 T.makeInteger(8, true);
1014 // Halves always get converted to 8-bit elements.
1015 if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
1016 T.makeInteger(8, true);
1017
1018 if (LocalCK == ClassI)
1019 T.makeSigned();
1020
1021 // Constant indices are always just "int".
1022 if (hasImmediate() && getImmediateIdx() == I)
1023 T.makeInteger(32, true);
1024
1025 S += T.builtin_str();
1026 }
1027
1028 // Extra constant integer to hold type class enum for this function, e.g. s8
1029 if (LocalCK == ClassB)
1030 S += "i";
1031
1032 return S;
1033 }
1034
getMangledName(bool ForceClassS)1035 std::string Intrinsic::getMangledName(bool ForceClassS) {
1036 // Check if the prototype has a scalar operand with the type of the vector
1037 // elements. If not, bitcasting the args will take care of arg checking.
1038 // The actual signedness etc. will be taken care of with special enums.
1039 ClassKind LocalCK = CK;
1040 if (!protoHasScalar())
1041 LocalCK = ClassB;
1042
1043 return mangleName(Name, ForceClassS ? ClassS : LocalCK);
1044 }
1045
mangleName(std::string Name,ClassKind LocalCK)1046 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) {
1047 std::string typeCode = getInstTypeCode(BaseType, LocalCK);
1048 std::string S = Name;
1049
1050 if (Name == "vcvt_f32_f16" || Name == "vcvt_f32_f64" ||
1051 Name == "vcvt_f64_f32")
1052 return Name;
1053
1054 if (typeCode.size() > 0) {
1055 // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
1056 if (Name.size() >= 3 && isdigit(Name.back()) &&
1057 Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
1058 S.insert(S.length() - 3, "_" + typeCode);
1059 else
1060 S += "_" + typeCode;
1061 }
1062
1063 if (BaseType != InBaseType) {
1064 // A reinterpret - out the input base type at the end.
1065 S += "_" + getInstTypeCode(InBaseType, LocalCK);
1066 }
1067
1068 if (LocalCK == ClassB)
1069 S += "_v";
1070
1071 // Insert a 'q' before the first '_' character so that it ends up before
1072 // _lane or _n on vector-scalar operations.
1073 if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
1074 size_t Pos = S.find('_');
1075 S.insert(Pos, "q");
1076 }
1077
1078 char Suffix = '\0';
1079 if (BaseType.isScalarForMangling()) {
1080 switch (BaseType.getElementSizeInBits()) {
1081 case 8: Suffix = 'b'; break;
1082 case 16: Suffix = 'h'; break;
1083 case 32: Suffix = 's'; break;
1084 case 64: Suffix = 'd'; break;
1085 default: llvm_unreachable("Bad suffix!");
1086 }
1087 }
1088 if (Suffix != '\0') {
1089 size_t Pos = S.find('_');
1090 S.insert(Pos, &Suffix, 1);
1091 }
1092
1093 return S;
1094 }
1095
replaceParamsIn(std::string S)1096 std::string Intrinsic::replaceParamsIn(std::string S) {
1097 while (S.find('$') != std::string::npos) {
1098 size_t Pos = S.find('$');
1099 size_t End = Pos + 1;
1100 while (isalpha(S[End]))
1101 ++End;
1102
1103 std::string VarName = S.substr(Pos + 1, End - Pos - 1);
1104 assert_with_loc(Variables.find(VarName) != Variables.end(),
1105 "Variable not defined!");
1106 S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
1107 }
1108
1109 return S;
1110 }
1111
initVariables()1112 void Intrinsic::initVariables() {
1113 Variables.clear();
1114
1115 // Modify the TypeSpec per-argument to get a concrete Type, and create
1116 // known variables for each.
1117 for (unsigned I = 1; I < Proto.size(); ++I) {
1118 char NameC = '0' + (I - 1);
1119 std::string Name = "p";
1120 Name.push_back(NameC);
1121
1122 Variables[Name] = Variable(Types[I], Name + VariablePostfix);
1123 }
1124 RetVar = Variable(Types[0], "ret" + VariablePostfix);
1125 }
1126
emitPrototype(StringRef NamePrefix)1127 void Intrinsic::emitPrototype(StringRef NamePrefix) {
1128 if (UseMacro)
1129 OS << "#define ";
1130 else
1131 OS << "__ai " << Types[0].str() << " ";
1132
1133 OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
1134
1135 for (unsigned I = 0; I < getNumParams(); ++I) {
1136 if (I != 0)
1137 OS << ", ";
1138
1139 char NameC = '0' + I;
1140 std::string Name = "p";
1141 Name.push_back(NameC);
1142 assert(Variables.find(Name) != Variables.end());
1143 Variable &V = Variables[Name];
1144
1145 if (!UseMacro)
1146 OS << V.getType().str() << " ";
1147 OS << V.getName();
1148 }
1149
1150 OS << ")";
1151 }
1152
emitOpeningBrace()1153 void Intrinsic::emitOpeningBrace() {
1154 if (UseMacro)
1155 OS << " __extension__ ({";
1156 else
1157 OS << " {";
1158 emitNewLine();
1159 }
1160
emitClosingBrace()1161 void Intrinsic::emitClosingBrace() {
1162 if (UseMacro)
1163 OS << "})";
1164 else
1165 OS << "}";
1166 }
1167
emitNewLine()1168 void Intrinsic::emitNewLine() {
1169 if (UseMacro)
1170 OS << " \\\n";
1171 else
1172 OS << "\n";
1173 }
1174
emitReverseVariable(Variable & Dest,Variable & Src)1175 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
1176 if (Dest.getType().getNumVectors() > 1) {
1177 emitNewLine();
1178
1179 for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
1180 OS << " " << Dest.getName() << ".val[" << utostr(K) << "] = "
1181 << "__builtin_shufflevector("
1182 << Src.getName() << ".val[" << utostr(K) << "], "
1183 << Src.getName() << ".val[" << utostr(K) << "]";
1184 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
1185 OS << ", " << utostr(J);
1186 OS << ");";
1187 emitNewLine();
1188 }
1189 } else {
1190 OS << " " << Dest.getName()
1191 << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
1192 for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
1193 OS << ", " << utostr(J);
1194 OS << ");";
1195 emitNewLine();
1196 }
1197 }
1198
emitArgumentReversal()1199 void Intrinsic::emitArgumentReversal() {
1200 if (BigEndianSafe)
1201 return;
1202
1203 // Reverse all vector arguments.
1204 for (unsigned I = 0; I < getNumParams(); ++I) {
1205 std::string Name = "p" + utostr(I);
1206 std::string NewName = "rev" + utostr(I);
1207
1208 Variable &V = Variables[Name];
1209 Variable NewV(V.getType(), NewName + VariablePostfix);
1210
1211 if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
1212 continue;
1213
1214 OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
1215 emitReverseVariable(NewV, V);
1216 V = NewV;
1217 }
1218 }
1219
emitReturnReversal()1220 void Intrinsic::emitReturnReversal() {
1221 if (BigEndianSafe)
1222 return;
1223 if (!getReturnType().isVector() || getReturnType().isVoid() ||
1224 getReturnType().getNumElements() == 1)
1225 return;
1226 emitReverseVariable(RetVar, RetVar);
1227 }
1228
1229
emitShadowedArgs()1230 void Intrinsic::emitShadowedArgs() {
1231 // Macro arguments are not type-checked like inline function arguments,
1232 // so assign them to local temporaries to get the right type checking.
1233 if (!UseMacro)
1234 return;
1235
1236 for (unsigned I = 0; I < getNumParams(); ++I) {
1237 // Do not create a temporary for an immediate argument.
1238 // That would defeat the whole point of using a macro!
1239 if (hasImmediate() && Proto[I+1] == 'i')
1240 continue;
1241 // Do not create a temporary for pointer arguments. The input
1242 // pointer may have an alignment hint.
1243 if (getParamType(I).isPointer())
1244 continue;
1245
1246 std::string Name = "p" + utostr(I);
1247
1248 assert(Variables.find(Name) != Variables.end());
1249 Variable &V = Variables[Name];
1250
1251 std::string NewName = "s" + utostr(I);
1252 Variable V2(V.getType(), NewName + VariablePostfix);
1253
1254 OS << " " << V2.getType().str() << " " << V2.getName() << " = "
1255 << V.getName() << ";";
1256 emitNewLine();
1257
1258 V = V2;
1259 }
1260 }
1261
1262 // We don't check 'a' in this function, because for builtin function the
1263 // argument matching to 'a' uses a vector type splatted from a scalar type.
protoHasScalar()1264 bool Intrinsic::protoHasScalar() {
1265 return (Proto.find('s') != std::string::npos ||
1266 Proto.find('z') != std::string::npos ||
1267 Proto.find('r') != std::string::npos ||
1268 Proto.find('b') != std::string::npos ||
1269 Proto.find('$') != std::string::npos ||
1270 Proto.find('y') != std::string::npos ||
1271 Proto.find('o') != std::string::npos);
1272 }
1273
emitBodyAsBuiltinCall()1274 void Intrinsic::emitBodyAsBuiltinCall() {
1275 std::string S;
1276
1277 // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
1278 // sret-like argument.
1279 bool SRet = getReturnType().getNumVectors() >= 2;
1280
1281 StringRef N = Name;
1282 if (hasSplat()) {
1283 // Call the non-splat builtin: chop off the "_n" suffix from the name.
1284 assert(N.endswith("_n"));
1285 N = N.drop_back(2);
1286 }
1287
1288 ClassKind LocalCK = CK;
1289 if (!protoHasScalar())
1290 LocalCK = ClassB;
1291
1292 if (!getReturnType().isVoid() && !SRet)
1293 S += "(" + RetVar.getType().str() + ") ";
1294
1295 S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
1296
1297 if (SRet)
1298 S += "&" + RetVar.getName() + ", ";
1299
1300 for (unsigned I = 0; I < getNumParams(); ++I) {
1301 Variable &V = Variables["p" + utostr(I)];
1302 Type T = V.getType();
1303
1304 // Handle multiple-vector values specially, emitting each subvector as an
1305 // argument to the builtin.
1306 if (T.getNumVectors() > 1) {
1307 // Check if an explicit cast is needed.
1308 std::string Cast;
1309 if (T.isChar() || T.isPoly() || !T.isSigned()) {
1310 Type T2 = T;
1311 T2.makeOneVector();
1312 T2.makeInteger(8, /*Signed=*/true);
1313 Cast = "(" + T2.str() + ")";
1314 }
1315
1316 for (unsigned J = 0; J < T.getNumVectors(); ++J)
1317 S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
1318 continue;
1319 }
1320
1321 std::string Arg;
1322 Type CastToType = T;
1323 if (hasSplat() && I == getSplatIdx()) {
1324 Arg = "(" + BaseType.str() + ") {";
1325 for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
1326 if (J != 0)
1327 Arg += ", ";
1328 Arg += V.getName();
1329 }
1330 Arg += "}";
1331
1332 CastToType = BaseType;
1333 } else {
1334 Arg = V.getName();
1335 }
1336
1337 // Check if an explicit cast is needed.
1338 if (CastToType.isVector()) {
1339 CastToType.makeInteger(8, true);
1340 Arg = "(" + CastToType.str() + ")" + Arg;
1341 }
1342
1343 S += Arg + ", ";
1344 }
1345
1346 // Extra constant integer to hold type class enum for this function, e.g. s8
1347 if (getClassKind(true) == ClassB) {
1348 Type ThisTy = getReturnType();
1349 if (Proto[0] == 'v' || Proto[0] == 'f' || Proto[0] == 'F')
1350 ThisTy = getParamType(0);
1351 if (ThisTy.isPointer())
1352 ThisTy = getParamType(1);
1353
1354 S += utostr(ThisTy.getNeonEnum());
1355 } else {
1356 // Remove extraneous ", ".
1357 S.pop_back();
1358 S.pop_back();
1359 }
1360 S += ");";
1361
1362 std::string RetExpr;
1363 if (!SRet && !RetVar.getType().isVoid())
1364 RetExpr = RetVar.getName() + " = ";
1365
1366 OS << " " << RetExpr << S;
1367 emitNewLine();
1368 }
1369
emitBody(StringRef CallPrefix)1370 void Intrinsic::emitBody(StringRef CallPrefix) {
1371 std::vector<std::string> Lines;
1372
1373 assert(RetVar.getType() == Types[0]);
1374 // Create a return variable, if we're not void.
1375 if (!RetVar.getType().isVoid()) {
1376 OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
1377 emitNewLine();
1378 }
1379
1380 if (!Body || Body->getValues().size() == 0) {
1381 // Nothing specific to output - must output a builtin.
1382 emitBodyAsBuiltinCall();
1383 return;
1384 }
1385
1386 // We have a list of "things to output". The last should be returned.
1387 for (auto *I : Body->getValues()) {
1388 if (StringInit *SI = dyn_cast<StringInit>(I)) {
1389 Lines.push_back(replaceParamsIn(SI->getAsString()));
1390 } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
1391 DagEmitter DE(*this, CallPrefix);
1392 Lines.push_back(DE.emitDag(DI).second + ";");
1393 }
1394 }
1395
1396 assert(!Lines.empty() && "Empty def?");
1397 if (!RetVar.getType().isVoid())
1398 Lines.back().insert(0, RetVar.getName() + " = ");
1399
1400 for (auto &L : Lines) {
1401 OS << " " << L;
1402 emitNewLine();
1403 }
1404 }
1405
emitReturn()1406 void Intrinsic::emitReturn() {
1407 if (RetVar.getType().isVoid())
1408 return;
1409 if (UseMacro)
1410 OS << " " << RetVar.getName() << ";";
1411 else
1412 OS << " return " << RetVar.getName() << ";";
1413 emitNewLine();
1414 }
1415
emitDag(DagInit * DI)1416 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
1417 // At this point we should only be seeing a def.
1418 DefInit *DefI = cast<DefInit>(DI->getOperator());
1419 std::string Op = DefI->getAsString();
1420
1421 if (Op == "cast" || Op == "bitcast")
1422 return emitDagCast(DI, Op == "bitcast");
1423 if (Op == "shuffle")
1424 return emitDagShuffle(DI);
1425 if (Op == "dup")
1426 return emitDagDup(DI);
1427 if (Op == "splat")
1428 return emitDagSplat(DI);
1429 if (Op == "save_temp")
1430 return emitDagSaveTemp(DI);
1431 if (Op == "op")
1432 return emitDagOp(DI);
1433 if (Op == "call")
1434 return emitDagCall(DI);
1435 if (Op == "name_replace")
1436 return emitDagNameReplace(DI);
1437 if (Op == "literal")
1438 return emitDagLiteral(DI);
1439 assert_with_loc(false, "Unknown operation!");
1440 return std::make_pair(Type::getVoid(), "");
1441 }
1442
emitDagOp(DagInit * DI)1443 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
1444 std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1445 if (DI->getNumArgs() == 2) {
1446 // Unary op.
1447 std::pair<Type, std::string> R =
1448 emitDagArg(DI->getArg(1), DI->getArgName(1));
1449 return std::make_pair(R.first, Op + R.second);
1450 } else {
1451 assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
1452 std::pair<Type, std::string> R1 =
1453 emitDagArg(DI->getArg(1), DI->getArgName(1));
1454 std::pair<Type, std::string> R2 =
1455 emitDagArg(DI->getArg(2), DI->getArgName(2));
1456 assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
1457 return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
1458 }
1459 }
1460
emitDagCall(DagInit * DI)1461 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
1462 std::vector<Type> Types;
1463 std::vector<std::string> Values;
1464 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1465 std::pair<Type, std::string> R =
1466 emitDagArg(DI->getArg(I + 1), DI->getArgName(I + 1));
1467 Types.push_back(R.first);
1468 Values.push_back(R.second);
1469 }
1470
1471 // Look up the called intrinsic.
1472 std::string N;
1473 if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
1474 N = SI->getAsUnquotedString();
1475 else
1476 N = emitDagArg(DI->getArg(0), "").second;
1477 Intrinsic *Callee = Intr.Emitter.getIntrinsic(N, Types);
1478 assert(Callee && "getIntrinsic should not return us nullptr!");
1479
1480 // Make sure the callee is known as an early def.
1481 Callee->setNeededEarly();
1482 Intr.Dependencies.insert(Callee);
1483
1484 // Now create the call itself.
1485 std::string S = CallPrefix.str() + Callee->getMangledName(true) + "(";
1486 for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
1487 if (I != 0)
1488 S += ", ";
1489 S += Values[I];
1490 }
1491 S += ")";
1492
1493 return std::make_pair(Callee->getReturnType(), S);
1494 }
1495
emitDagCast(DagInit * DI,bool IsBitCast)1496 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
1497 bool IsBitCast){
1498 // (cast MOD* VAL) -> cast VAL to type given by MOD.
1499 std::pair<Type, std::string> R = emitDagArg(
1500 DI->getArg(DI->getNumArgs() - 1), DI->getArgName(DI->getNumArgs() - 1));
1501 Type castToType = R.first;
1502 for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
1503
1504 // MOD can take several forms:
1505 // 1. $X - take the type of parameter / variable X.
1506 // 2. The value "R" - take the type of the return type.
1507 // 3. a type string
1508 // 4. The value "U" or "S" to switch the signedness.
1509 // 5. The value "H" or "D" to half or double the bitwidth.
1510 // 6. The value "8" to convert to 8-bit (signed) integer lanes.
1511 if (DI->getArgName(ArgIdx).size()) {
1512 assert_with_loc(Intr.Variables.find(DI->getArgName(ArgIdx)) !=
1513 Intr.Variables.end(),
1514 "Variable not found");
1515 castToType = Intr.Variables[DI->getArgName(ArgIdx)].getType();
1516 } else {
1517 StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
1518 assert_with_loc(SI, "Expected string type or $Name for cast type");
1519
1520 if (SI->getAsUnquotedString() == "R") {
1521 castToType = Intr.getReturnType();
1522 } else if (SI->getAsUnquotedString() == "U") {
1523 castToType.makeUnsigned();
1524 } else if (SI->getAsUnquotedString() == "S") {
1525 castToType.makeSigned();
1526 } else if (SI->getAsUnquotedString() == "H") {
1527 castToType.halveLanes();
1528 } else if (SI->getAsUnquotedString() == "D") {
1529 castToType.doubleLanes();
1530 } else if (SI->getAsUnquotedString() == "8") {
1531 castToType.makeInteger(8, true);
1532 } else {
1533 castToType = Type::fromTypedefName(SI->getAsUnquotedString());
1534 assert_with_loc(!castToType.isVoid(), "Unknown typedef");
1535 }
1536 }
1537 }
1538
1539 std::string S;
1540 if (IsBitCast) {
1541 // Emit a reinterpret cast. The second operand must be an lvalue, so create
1542 // a temporary.
1543 std::string N = "reint";
1544 unsigned I = 0;
1545 while (Intr.Variables.find(N) != Intr.Variables.end())
1546 N = "reint" + utostr(++I);
1547 Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
1548
1549 Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
1550 << R.second << ";";
1551 Intr.emitNewLine();
1552
1553 S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
1554 } else {
1555 // Emit a normal (static) cast.
1556 S = "(" + castToType.str() + ")(" + R.second + ")";
1557 }
1558
1559 return std::make_pair(castToType, S);
1560 }
1561
emitDagShuffle(DagInit * DI)1562 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
1563 // See the documentation in arm_neon.td for a description of these operators.
1564 class LowHalf : public SetTheory::Operator {
1565 public:
1566 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1567 ArrayRef<SMLoc> Loc) override {
1568 SetTheory::RecSet Elts2;
1569 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1570 Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
1571 }
1572 };
1573 class HighHalf : public SetTheory::Operator {
1574 public:
1575 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1576 ArrayRef<SMLoc> Loc) override {
1577 SetTheory::RecSet Elts2;
1578 ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
1579 Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
1580 }
1581 };
1582 class Rev : public SetTheory::Operator {
1583 unsigned ElementSize;
1584
1585 public:
1586 Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
1587 void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
1588 ArrayRef<SMLoc> Loc) override {
1589 SetTheory::RecSet Elts2;
1590 ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
1591
1592 int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
1593 VectorSize /= ElementSize;
1594
1595 std::vector<Record *> Revved;
1596 for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
1597 for (int LI = VectorSize - 1; LI >= 0; --LI) {
1598 Revved.push_back(Elts2[VI + LI]);
1599 }
1600 }
1601
1602 Elts.insert(Revved.begin(), Revved.end());
1603 }
1604 };
1605 class MaskExpander : public SetTheory::Expander {
1606 unsigned N;
1607
1608 public:
1609 MaskExpander(unsigned N) : N(N) {}
1610 void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
1611 unsigned Addend = 0;
1612 if (R->getName() == "mask0")
1613 Addend = 0;
1614 else if (R->getName() == "mask1")
1615 Addend = N;
1616 else
1617 return;
1618 for (unsigned I = 0; I < N; ++I)
1619 Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
1620 }
1621 };
1622
1623 // (shuffle arg1, arg2, sequence)
1624 std::pair<Type, std::string> Arg1 =
1625 emitDagArg(DI->getArg(0), DI->getArgName(0));
1626 std::pair<Type, std::string> Arg2 =
1627 emitDagArg(DI->getArg(1), DI->getArgName(1));
1628 assert_with_loc(Arg1.first == Arg2.first,
1629 "Different types in arguments to shuffle!");
1630
1631 SetTheory ST;
1632 LowHalf LH;
1633 HighHalf HH;
1634 MaskExpander ME(Arg1.first.getNumElements());
1635 Rev R(Arg1.first.getElementSizeInBits());
1636 SetTheory::RecSet Elts;
1637 ST.addOperator("lowhalf", &LH);
1638 ST.addOperator("highhalf", &HH);
1639 ST.addOperator("rev", &R);
1640 ST.addExpander("MaskExpand", &ME);
1641 ST.evaluate(DI->getArg(2), Elts, None);
1642
1643 std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
1644 for (auto &E : Elts) {
1645 StringRef Name = E->getName();
1646 assert_with_loc(Name.startswith("sv"),
1647 "Incorrect element kind in shuffle mask!");
1648 S += ", " + Name.drop_front(2).str();
1649 }
1650 S += ")";
1651
1652 // Recalculate the return type - the shuffle may have halved or doubled it.
1653 Type T(Arg1.first);
1654 if (Elts.size() > T.getNumElements()) {
1655 assert_with_loc(
1656 Elts.size() == T.getNumElements() * 2,
1657 "Can only double or half the number of elements in a shuffle!");
1658 T.doubleLanes();
1659 } else if (Elts.size() < T.getNumElements()) {
1660 assert_with_loc(
1661 Elts.size() == T.getNumElements() / 2,
1662 "Can only double or half the number of elements in a shuffle!");
1663 T.halveLanes();
1664 }
1665
1666 return std::make_pair(T, S);
1667 }
1668
emitDagDup(DagInit * DI)1669 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
1670 assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
1671 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
1672 assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
1673
1674 Type T = Intr.getBaseType();
1675 assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
1676 std::string S = "(" + T.str() + ") {";
1677 for (unsigned I = 0; I < T.getNumElements(); ++I) {
1678 if (I != 0)
1679 S += ", ";
1680 S += A.second;
1681 }
1682 S += "}";
1683
1684 return std::make_pair(T, S);
1685 }
1686
emitDagSplat(DagInit * DI)1687 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
1688 assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
1689 std::pair<Type, std::string> A = emitDagArg(DI->getArg(0), DI->getArgName(0));
1690 std::pair<Type, std::string> B = emitDagArg(DI->getArg(1), DI->getArgName(1));
1691
1692 assert_with_loc(B.first.isScalar(),
1693 "splat() requires a scalar int as the second argument");
1694
1695 std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
1696 for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
1697 S += ", " + B.second;
1698 }
1699 S += ")";
1700
1701 return std::make_pair(Intr.getBaseType(), S);
1702 }
1703
emitDagSaveTemp(DagInit * DI)1704 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
1705 assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
1706 std::pair<Type, std::string> A = emitDagArg(DI->getArg(1), DI->getArgName(1));
1707
1708 assert_with_loc(!A.first.isVoid(),
1709 "Argument to save_temp() must have non-void type!");
1710
1711 std::string N = DI->getArgName(0);
1712 assert_with_loc(N.size(), "save_temp() expects a name as the first argument");
1713
1714 assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
1715 "Variable already defined!");
1716 Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
1717
1718 std::string S =
1719 A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
1720
1721 return std::make_pair(Type::getVoid(), S);
1722 }
1723
1724 std::pair<Type, std::string>
emitDagNameReplace(DagInit * DI)1725 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
1726 std::string S = Intr.Name;
1727
1728 assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
1729 std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1730 std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1731
1732 size_t Idx = S.find(ToReplace);
1733
1734 assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
1735 S.replace(Idx, ToReplace.size(), ReplaceWith);
1736
1737 return std::make_pair(Type::getVoid(), S);
1738 }
1739
emitDagLiteral(DagInit * DI)1740 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
1741 std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
1742 std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
1743 return std::make_pair(Type::fromTypedefName(Ty), Value);
1744 }
1745
1746 std::pair<Type, std::string>
emitDagArg(Init * Arg,std::string ArgName)1747 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
1748 if (ArgName.size()) {
1749 assert_with_loc(!Arg->isComplete(),
1750 "Arguments must either be DAGs or names, not both!");
1751 assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
1752 "Variable not defined!");
1753 Variable &V = Intr.Variables[ArgName];
1754 return std::make_pair(V.getType(), V.getName());
1755 }
1756
1757 assert(Arg && "Neither ArgName nor Arg?!");
1758 DagInit *DI = dyn_cast<DagInit>(Arg);
1759 assert_with_loc(DI, "Arguments must either be DAGs or names!");
1760
1761 return emitDag(DI);
1762 }
1763
generate()1764 std::string Intrinsic::generate() {
1765 // Little endian intrinsics are simple and don't require any argument
1766 // swapping.
1767 OS << "#ifdef __LITTLE_ENDIAN__\n";
1768
1769 generateImpl(false, "", "");
1770
1771 OS << "#else\n";
1772
1773 // Big endian intrinsics are more complex. The user intended these
1774 // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
1775 // but we load as-if (V)LD1. So we should swap all arguments and
1776 // swap the return value too.
1777 //
1778 // If we call sub-intrinsics, we should call a version that does
1779 // not re-swap the arguments!
1780 generateImpl(true, "", "__noswap_");
1781
1782 // If we're needed early, create a non-swapping variant for
1783 // big-endian.
1784 if (NeededEarly) {
1785 generateImpl(false, "__noswap_", "__noswap_");
1786 }
1787 OS << "#endif\n\n";
1788
1789 return OS.str();
1790 }
1791
generateImpl(bool ReverseArguments,StringRef NamePrefix,StringRef CallPrefix)1792 void Intrinsic::generateImpl(bool ReverseArguments,
1793 StringRef NamePrefix, StringRef CallPrefix) {
1794 CurrentRecord = R;
1795
1796 // If we call a macro, our local variables may be corrupted due to
1797 // lack of proper lexical scoping. So, add a globally unique postfix
1798 // to every variable.
1799 //
1800 // indexBody() should have set up the Dependencies set by now.
1801 for (auto *I : Dependencies)
1802 if (I->UseMacro) {
1803 VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
1804 break;
1805 }
1806
1807 initVariables();
1808
1809 emitPrototype(NamePrefix);
1810
1811 if (IsUnavailable) {
1812 OS << " __attribute__((unavailable));";
1813 } else {
1814 emitOpeningBrace();
1815 emitShadowedArgs();
1816 if (ReverseArguments)
1817 emitArgumentReversal();
1818 emitBody(CallPrefix);
1819 if (ReverseArguments)
1820 emitReturnReversal();
1821 emitReturn();
1822 emitClosingBrace();
1823 }
1824 OS << "\n";
1825
1826 CurrentRecord = nullptr;
1827 }
1828
indexBody()1829 void Intrinsic::indexBody() {
1830 CurrentRecord = R;
1831
1832 initVariables();
1833 emitBody("");
1834 OS.str("");
1835
1836 CurrentRecord = nullptr;
1837 }
1838
1839 //===----------------------------------------------------------------------===//
1840 // NeonEmitter implementation
1841 //===----------------------------------------------------------------------===//
1842
getIntrinsic(StringRef Name,ArrayRef<Type> Types)1843 Intrinsic *NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
1844 // First, look up the name in the intrinsic map.
1845 assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
1846 ("Intrinsic '" + Name + "' not found!").str());
1847 std::vector<Intrinsic *> &V = IntrinsicMap[Name.str()];
1848 std::vector<Intrinsic *> GoodVec;
1849
1850 // Create a string to print if we end up failing.
1851 std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
1852 for (unsigned I = 0; I < Types.size(); ++I) {
1853 if (I != 0)
1854 ErrMsg += ", ";
1855 ErrMsg += Types[I].str();
1856 }
1857 ErrMsg += ")'\n";
1858 ErrMsg += "Available overloads:\n";
1859
1860 // Now, look through each intrinsic implementation and see if the types are
1861 // compatible.
1862 for (auto *I : V) {
1863 ErrMsg += " - " + I->getReturnType().str() + " " + I->getMangledName();
1864 ErrMsg += "(";
1865 for (unsigned A = 0; A < I->getNumParams(); ++A) {
1866 if (A != 0)
1867 ErrMsg += ", ";
1868 ErrMsg += I->getParamType(A).str();
1869 }
1870 ErrMsg += ")\n";
1871
1872 if (I->getNumParams() != Types.size())
1873 continue;
1874
1875 bool Good = true;
1876 for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
1877 if (I->getParamType(Arg) != Types[Arg]) {
1878 Good = false;
1879 break;
1880 }
1881 }
1882 if (Good)
1883 GoodVec.push_back(I);
1884 }
1885
1886 assert_with_loc(GoodVec.size() > 0,
1887 "No compatible intrinsic found - " + ErrMsg);
1888 assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
1889
1890 return GoodVec.front();
1891 }
1892
createIntrinsic(Record * R,SmallVectorImpl<Intrinsic * > & Out)1893 void NeonEmitter::createIntrinsic(Record *R,
1894 SmallVectorImpl<Intrinsic *> &Out) {
1895 std::string Name = R->getValueAsString("Name");
1896 std::string Proto = R->getValueAsString("Prototype");
1897 std::string Types = R->getValueAsString("Types");
1898 Record *OperationRec = R->getValueAsDef("Operation");
1899 bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
1900 bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
1901 std::string Guard = R->getValueAsString("ArchGuard");
1902 bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
1903
1904 // Set the global current record. This allows assert_with_loc to produce
1905 // decent location information even when highly nested.
1906 CurrentRecord = R;
1907
1908 ListInit *Body = OperationRec->getValueAsListInit("Ops");
1909
1910 std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
1911
1912 ClassKind CK = ClassNone;
1913 if (R->getSuperClasses().size() >= 2)
1914 CK = ClassMap[R->getSuperClasses()[1]];
1915
1916 std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
1917 for (auto TS : TypeSpecs) {
1918 if (CartesianProductOfTypes) {
1919 Type DefaultT(TS, 'd');
1920 for (auto SrcTS : TypeSpecs) {
1921 Type DefaultSrcT(SrcTS, 'd');
1922 if (TS == SrcTS ||
1923 DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
1924 continue;
1925 NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
1926 }
1927 } else {
1928 NewTypeSpecs.push_back(std::make_pair(TS, TS));
1929 }
1930 }
1931
1932 std::sort(NewTypeSpecs.begin(), NewTypeSpecs.end());
1933 NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
1934 NewTypeSpecs.end());
1935
1936 for (auto &I : NewTypeSpecs) {
1937 Intrinsic *IT = new Intrinsic(R, Name, Proto, I.first, I.second, CK, Body,
1938 *this, Guard, IsUnavailable, BigEndianSafe);
1939
1940 IntrinsicMap[Name].push_back(IT);
1941 Out.push_back(IT);
1942 }
1943
1944 CurrentRecord = nullptr;
1945 }
1946
1947 /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
1948 /// declaration of builtins, checking for unique builtin declarations.
genBuiltinsDef(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)1949 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
1950 SmallVectorImpl<Intrinsic *> &Defs) {
1951 OS << "#ifdef GET_NEON_BUILTINS\n";
1952
1953 // We only want to emit a builtin once, and we want to emit them in
1954 // alphabetical order, so use a std::set.
1955 std::set<std::string> Builtins;
1956
1957 for (auto *Def : Defs) {
1958 if (Def->hasBody())
1959 continue;
1960 // Functions with 'a' (the splat code) in the type prototype should not get
1961 // their own builtin as they use the non-splat variant.
1962 if (Def->hasSplat())
1963 continue;
1964
1965 std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
1966
1967 S += Def->getBuiltinTypeStr();
1968 S += "\", \"n\")";
1969
1970 Builtins.insert(S);
1971 }
1972
1973 for (auto &S : Builtins)
1974 OS << S << "\n";
1975 OS << "#endif\n\n";
1976 }
1977
1978 /// Generate the ARM and AArch64 overloaded type checking code for
1979 /// SemaChecking.cpp, checking for unique builtin declarations.
genOverloadTypeCheckCode(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)1980 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
1981 SmallVectorImpl<Intrinsic *> &Defs) {
1982 OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
1983
1984 // We record each overload check line before emitting because subsequent Inst
1985 // definitions may extend the number of permitted types (i.e. augment the
1986 // Mask). Use std::map to avoid sorting the table by hash number.
1987 struct OverloadInfo {
1988 uint64_t Mask;
1989 int PtrArgNum;
1990 bool HasConstPtr;
1991 OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
1992 };
1993 std::map<std::string, OverloadInfo> OverloadMap;
1994
1995 for (auto *Def : Defs) {
1996 // If the def has a body (that is, it has Operation DAGs), it won't call
1997 // __builtin_neon_* so we don't need to generate a definition for it.
1998 if (Def->hasBody())
1999 continue;
2000 // Functions with 'a' (the splat code) in the type prototype should not get
2001 // their own builtin as they use the non-splat variant.
2002 if (Def->hasSplat())
2003 continue;
2004 // Functions which have a scalar argument cannot be overloaded, no need to
2005 // check them if we are emitting the type checking code.
2006 if (Def->protoHasScalar())
2007 continue;
2008
2009 uint64_t Mask = 0ULL;
2010 Type Ty = Def->getReturnType();
2011 if (Def->getProto()[0] == 'v' || Def->getProto()[0] == 'f' ||
2012 Def->getProto()[0] == 'F')
2013 Ty = Def->getParamType(0);
2014 if (Ty.isPointer())
2015 Ty = Def->getParamType(1);
2016
2017 Mask |= 1ULL << Ty.getNeonEnum();
2018
2019 // Check if the function has a pointer or const pointer argument.
2020 std::string Proto = Def->getProto();
2021 int PtrArgNum = -1;
2022 bool HasConstPtr = false;
2023 for (unsigned I = 0; I < Def->getNumParams(); ++I) {
2024 char ArgType = Proto[I + 1];
2025 if (ArgType == 'c') {
2026 HasConstPtr = true;
2027 PtrArgNum = I;
2028 break;
2029 }
2030 if (ArgType == 'p') {
2031 PtrArgNum = I;
2032 break;
2033 }
2034 }
2035 // For sret builtins, adjust the pointer argument index.
2036 if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
2037 PtrArgNum += 1;
2038
2039 std::string Name = Def->getName();
2040 // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
2041 // and vst1_lane intrinsics. Using a pointer to the vector element
2042 // type with one of those operations causes codegen to select an aligned
2043 // load/store instruction. If you want an unaligned operation,
2044 // the pointer argument needs to have less alignment than element type,
2045 // so just accept any pointer type.
2046 if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
2047 PtrArgNum = -1;
2048 HasConstPtr = false;
2049 }
2050
2051 if (Mask) {
2052 std::string Name = Def->getMangledName();
2053 OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
2054 OverloadInfo &OI = OverloadMap[Name];
2055 OI.Mask |= Mask;
2056 OI.PtrArgNum |= PtrArgNum;
2057 OI.HasConstPtr = HasConstPtr;
2058 }
2059 }
2060
2061 for (auto &I : OverloadMap) {
2062 OverloadInfo &OI = I.second;
2063
2064 OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
2065 OS << "mask = 0x" << utohexstr(OI.Mask) << "ULL";
2066 if (OI.PtrArgNum >= 0)
2067 OS << "; PtrArgNum = " << OI.PtrArgNum;
2068 if (OI.HasConstPtr)
2069 OS << "; HasConstPtr = true";
2070 OS << "; break;\n";
2071 }
2072 OS << "#endif\n\n";
2073 }
2074
2075 void
genIntrinsicRangeCheckCode(raw_ostream & OS,SmallVectorImpl<Intrinsic * > & Defs)2076 NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
2077 SmallVectorImpl<Intrinsic *> &Defs) {
2078 OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
2079
2080 std::set<std::string> Emitted;
2081
2082 for (auto *Def : Defs) {
2083 if (Def->hasBody())
2084 continue;
2085 // Functions with 'a' (the splat code) in the type prototype should not get
2086 // their own builtin as they use the non-splat variant.
2087 if (Def->hasSplat())
2088 continue;
2089 // Functions which do not have an immediate do not need to have range
2090 // checking code emitted.
2091 if (!Def->hasImmediate())
2092 continue;
2093 if (Emitted.find(Def->getMangledName()) != Emitted.end())
2094 continue;
2095
2096 std::string LowerBound, UpperBound;
2097
2098 Record *R = Def->getRecord();
2099 if (R->getValueAsBit("isVCVT_N")) {
2100 // VCVT between floating- and fixed-point values takes an immediate
2101 // in the range [1, 32) for f32 or [1, 64) for f64.
2102 LowerBound = "1";
2103 if (Def->getBaseType().getElementSizeInBits() == 32)
2104 UpperBound = "31";
2105 else
2106 UpperBound = "63";
2107 } else if (R->getValueAsBit("isScalarShift")) {
2108 // Right shifts have an 'r' in the name, left shifts do not. Convert
2109 // instructions have the same bounds and right shifts.
2110 if (Def->getName().find('r') != std::string::npos ||
2111 Def->getName().find("cvt") != std::string::npos)
2112 LowerBound = "1";
2113
2114 UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
2115 } else if (R->getValueAsBit("isShift")) {
2116 // Builtins which are overloaded by type will need to have their upper
2117 // bound computed at Sema time based on the type constant.
2118
2119 // Right shifts have an 'r' in the name, left shifts do not.
2120 if (Def->getName().find('r') != std::string::npos)
2121 LowerBound = "1";
2122 UpperBound = "RFT(TV, true)";
2123 } else if (Def->getClassKind(true) == ClassB) {
2124 // ClassB intrinsics have a type (and hence lane number) that is only
2125 // known at runtime.
2126 if (R->getValueAsBit("isLaneQ"))
2127 UpperBound = "RFT(TV, false, true)";
2128 else
2129 UpperBound = "RFT(TV, false, false)";
2130 } else {
2131 // The immediate generally refers to a lane in the preceding argument.
2132 assert(Def->getImmediateIdx() > 0);
2133 Type T = Def->getParamType(Def->getImmediateIdx() - 1);
2134 UpperBound = utostr(T.getNumElements() - 1);
2135 }
2136
2137 // Calculate the index of the immediate that should be range checked.
2138 unsigned Idx = Def->getNumParams();
2139 if (Def->hasImmediate())
2140 Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
2141
2142 OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
2143 << "i = " << Idx << ";";
2144 if (LowerBound.size())
2145 OS << " l = " << LowerBound << ";";
2146 if (UpperBound.size())
2147 OS << " u = " << UpperBound << ";";
2148 OS << " break;\n";
2149
2150 Emitted.insert(Def->getMangledName());
2151 }
2152
2153 OS << "#endif\n\n";
2154 }
2155
2156 /// runHeader - Emit a file with sections defining:
2157 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
2158 /// 2. the SemaChecking code for the type overload checking.
2159 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
runHeader(raw_ostream & OS)2160 void NeonEmitter::runHeader(raw_ostream &OS) {
2161 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2162
2163 SmallVector<Intrinsic *, 128> Defs;
2164 for (auto *R : RV)
2165 createIntrinsic(R, Defs);
2166
2167 // Generate shared BuiltinsXXX.def
2168 genBuiltinsDef(OS, Defs);
2169
2170 // Generate ARM overloaded type checking code for SemaChecking.cpp
2171 genOverloadTypeCheckCode(OS, Defs);
2172
2173 // Generate ARM range checking code for shift/lane immediates.
2174 genIntrinsicRangeCheckCode(OS, Defs);
2175 }
2176
2177 /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
2178 /// is comprised of type definitions and function declarations.
run(raw_ostream & OS)2179 void NeonEmitter::run(raw_ostream &OS) {
2180 OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
2181 "------------------------------"
2182 "---===\n"
2183 " *\n"
2184 " * Permission is hereby granted, free of charge, to any person "
2185 "obtaining "
2186 "a copy\n"
2187 " * of this software and associated documentation files (the "
2188 "\"Software\"),"
2189 " to deal\n"
2190 " * in the Software without restriction, including without limitation "
2191 "the "
2192 "rights\n"
2193 " * to use, copy, modify, merge, publish, distribute, sublicense, "
2194 "and/or sell\n"
2195 " * copies of the Software, and to permit persons to whom the Software "
2196 "is\n"
2197 " * furnished to do so, subject to the following conditions:\n"
2198 " *\n"
2199 " * The above copyright notice and this permission notice shall be "
2200 "included in\n"
2201 " * all copies or substantial portions of the Software.\n"
2202 " *\n"
2203 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
2204 "EXPRESS OR\n"
2205 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
2206 "MERCHANTABILITY,\n"
2207 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
2208 "SHALL THE\n"
2209 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
2210 "OTHER\n"
2211 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
2212 "ARISING FROM,\n"
2213 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
2214 "DEALINGS IN\n"
2215 " * THE SOFTWARE.\n"
2216 " *\n"
2217 " *===-----------------------------------------------------------------"
2218 "---"
2219 "---===\n"
2220 " */\n\n";
2221
2222 OS << "#ifndef __ARM_NEON_H\n";
2223 OS << "#define __ARM_NEON_H\n\n";
2224
2225 OS << "#if !defined(__ARM_NEON)\n";
2226 OS << "#error \"NEON support not enabled\"\n";
2227 OS << "#endif\n\n";
2228
2229 OS << "#include <stdint.h>\n\n";
2230
2231 // Emit NEON-specific scalar typedefs.
2232 OS << "typedef float float32_t;\n";
2233 OS << "typedef __fp16 float16_t;\n";
2234
2235 OS << "#ifdef __aarch64__\n";
2236 OS << "typedef double float64_t;\n";
2237 OS << "#endif\n\n";
2238
2239 // For now, signedness of polynomial types depends on target
2240 OS << "#ifdef __aarch64__\n";
2241 OS << "typedef uint8_t poly8_t;\n";
2242 OS << "typedef uint16_t poly16_t;\n";
2243 OS << "typedef uint64_t poly64_t;\n";
2244 OS << "typedef __uint128_t poly128_t;\n";
2245 OS << "#else\n";
2246 OS << "typedef int8_t poly8_t;\n";
2247 OS << "typedef int16_t poly16_t;\n";
2248 OS << "#endif\n";
2249
2250 // Emit Neon vector typedefs.
2251 std::string TypedefTypes(
2252 "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
2253 std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
2254
2255 // Emit vector typedefs.
2256 bool InIfdef = false;
2257 for (auto &TS : TDTypeVec) {
2258 bool IsA64 = false;
2259 Type T(TS, 'd');
2260 if (T.isDouble() || (T.isPoly() && T.isLong()))
2261 IsA64 = true;
2262
2263 if (InIfdef && !IsA64) {
2264 OS << "#endif\n";
2265 InIfdef = false;
2266 }
2267 if (!InIfdef && IsA64) {
2268 OS << "#ifdef __aarch64__\n";
2269 InIfdef = true;
2270 }
2271
2272 if (T.isPoly())
2273 OS << "typedef __attribute__((neon_polyvector_type(";
2274 else
2275 OS << "typedef __attribute__((neon_vector_type(";
2276
2277 Type T2 = T;
2278 T2.makeScalar();
2279 OS << utostr(T.getNumElements()) << "))) ";
2280 OS << T2.str();
2281 OS << " " << T.str() << ";\n";
2282 }
2283 if (InIfdef)
2284 OS << "#endif\n";
2285 OS << "\n";
2286
2287 // Emit struct typedefs.
2288 InIfdef = false;
2289 for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
2290 for (auto &TS : TDTypeVec) {
2291 bool IsA64 = false;
2292 Type T(TS, 'd');
2293 if (T.isDouble() || (T.isPoly() && T.isLong()))
2294 IsA64 = true;
2295
2296 if (InIfdef && !IsA64) {
2297 OS << "#endif\n";
2298 InIfdef = false;
2299 }
2300 if (!InIfdef && IsA64) {
2301 OS << "#ifdef __aarch64__\n";
2302 InIfdef = true;
2303 }
2304
2305 char M = '2' + (NumMembers - 2);
2306 Type VT(TS, M);
2307 OS << "typedef struct " << VT.str() << " {\n";
2308 OS << " " << T.str() << " val";
2309 OS << "[" << utostr(NumMembers) << "]";
2310 OS << ";\n} ";
2311 OS << VT.str() << ";\n";
2312 OS << "\n";
2313 }
2314 }
2315 if (InIfdef)
2316 OS << "#endif\n";
2317 OS << "\n";
2318
2319 OS << "#define __ai static inline __attribute__((__always_inline__, "
2320 "__nodebug__))\n\n";
2321
2322 SmallVector<Intrinsic *, 128> Defs;
2323 std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
2324 for (auto *R : RV)
2325 createIntrinsic(R, Defs);
2326
2327 for (auto *I : Defs)
2328 I->indexBody();
2329
2330 std::stable_sort(
2331 Defs.begin(), Defs.end(),
2332 [](const Intrinsic *A, const Intrinsic *B) { return *A < *B; });
2333
2334 // Only emit a def when its requirements have been met.
2335 // FIXME: This loop could be made faster, but it's fast enough for now.
2336 bool MadeProgress = true;
2337 std::string InGuard = "";
2338 while (!Defs.empty() && MadeProgress) {
2339 MadeProgress = false;
2340
2341 for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
2342 I != Defs.end(); /*No step*/) {
2343 bool DependenciesSatisfied = true;
2344 for (auto *II : (*I)->getDependencies()) {
2345 if (std::find(Defs.begin(), Defs.end(), II) != Defs.end())
2346 DependenciesSatisfied = false;
2347 }
2348 if (!DependenciesSatisfied) {
2349 // Try the next one.
2350 ++I;
2351 continue;
2352 }
2353
2354 // Emit #endif/#if pair if needed.
2355 if ((*I)->getGuard() != InGuard) {
2356 if (!InGuard.empty())
2357 OS << "#endif\n";
2358 InGuard = (*I)->getGuard();
2359 if (!InGuard.empty())
2360 OS << "#if " << InGuard << "\n";
2361 }
2362
2363 // Actually generate the intrinsic code.
2364 OS << (*I)->generate();
2365
2366 MadeProgress = true;
2367 I = Defs.erase(I);
2368 }
2369 }
2370 assert(Defs.empty() && "Some requirements were not satisfied!");
2371 if (!InGuard.empty())
2372 OS << "#endif\n";
2373
2374 OS << "\n";
2375 OS << "#undef __ai\n\n";
2376 OS << "#endif /* __ARM_NEON_H */\n";
2377 }
2378
2379 namespace clang {
EmitNeon(RecordKeeper & Records,raw_ostream & OS)2380 void EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
2381 NeonEmitter(Records).run(OS);
2382 }
EmitNeonSema(RecordKeeper & Records,raw_ostream & OS)2383 void EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
2384 NeonEmitter(Records).runHeader(OS);
2385 }
EmitNeonTest(RecordKeeper & Records,raw_ostream & OS)2386 void EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
2387 llvm_unreachable("Neon test generation no longer implemented!");
2388 }
2389 } // End namespace clang
2390