1 /*
2  * Copyright 2012, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "Assert.h"
18 #include "Log.h"
19 #include "RSTransforms.h"
20 #include "RSUtils.h"
21 
22 #include "bcc/Config.h"
23 #include "bcinfo/MetadataExtractor.h"
24 
25 #include "slang_version.h"
26 
27 #include <cstdlib>
28 #include <functional>
29 #include <unordered_set>
30 
31 #include <llvm/IR/DerivedTypes.h>
32 #include <llvm/IR/Function.h>
33 #include <llvm/IR/Instructions.h>
34 #include <llvm/IR/IRBuilder.h>
35 #include <llvm/IR/MDBuilder.h>
36 #include <llvm/IR/Module.h>
37 #include <llvm/Pass.h>
38 #include <llvm/Support/raw_ostream.h>
39 #include <llvm/IR/DataLayout.h>
40 #include <llvm/IR/Function.h>
41 #include <llvm/IR/Type.h>
42 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
43 
44 #ifndef __DISABLE_ASSERTS
45 // Only used in bccAssert()
46 const int kNumExpandedForeachParams = 4;
47 const int kNumExpandedReduceAccumulatorParams = 4;
48 #endif
49 
50 const char kRenderScriptTBAARootName[] = "RenderScript Distinct TBAA";
51 const char kRenderScriptTBAANodeName[] = "RenderScript TBAA";
52 
53 using namespace bcc;
54 
55 namespace {
56 
57 static const bool gEnableRsTbaa = true;
58 
59 /* RSKernelExpandPass
60  *
61  * This pass generates functions used to implement calls via
62  * rsForEach(), "foreach_<NAME>", or "reduce_<NAME>". We create an
63  * inner loop for the function to be invoked over the appropriate data
64  * cells of the input/output allocations (adjusting other relevant
65  * parameters as we go). We support doing this for any forEach or
66  * reduce style compute kernels.
67  *
68  * In the case of a foreach kernel or a simple reduction kernel, the
69  * new function name is the original function name "<NAME>" followed
70  * by ".expand" -- "<NAME>.expand".
71  *
72  * In the case of a general reduction kernel, the kernel's accumulator
73  * function is the one transformed, and the new function name is the
74  * original accumulator function name "<ACCUMFN>" followed by
75  * ".expand" -- "<ACCUMFN>.expand". Using the name "<ACCUMFN>.expand"
76  * for the function generated from the accumulator should not
77  * introduce any possibility for name clashes today: The accumulator
78  * function <ACCUMFN> must be static, so it cannot also serve as a
79  * foreach kernel; and the code for <ACCUMFN>.expand depends only on
80  * <ACCUMFN>, not on any other properties of the reduction kernel, so
81  * any reduction kernels that share the accumulator <ACCUMFN> can
82  * share <ACCUMFN>.expand also.
83  *
84  * Note that this pass does not delete the original function <NAME> or
85  * <ACCUMFN>. However, if it is inlined into the newly-generated
86  * function and not otherwise referenced, then a subsequent pass may
87  * delete it.
88  */
89 class RSKernelExpandPass : public llvm::ModulePass {
90 public:
91   static char ID;
92 
93 private:
94   static const size_t RS_KERNEL_INPUT_LIMIT = 8;  // see frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h
95 
96   typedef std::unordered_set<llvm::Function *> FunctionSet;
97 
98   enum RsLaunchDimensionsField {
99     RsLaunchDimensionsFieldX,
100     RsLaunchDimensionsFieldY,
101     RsLaunchDimensionsFieldZ,
102     RsLaunchDimensionsFieldLod,
103     RsLaunchDimensionsFieldFace,
104     RsLaunchDimensionsFieldArray,
105 
106     RsLaunchDimensionsFieldCount
107   };
108 
109   enum RsExpandKernelDriverInfoPfxField {
110     RsExpandKernelDriverInfoPfxFieldInPtr,
111     RsExpandKernelDriverInfoPfxFieldInStride,
112     RsExpandKernelDriverInfoPfxFieldInLen,
113     RsExpandKernelDriverInfoPfxFieldOutPtr,
114     RsExpandKernelDriverInfoPfxFieldOutStride,
115     RsExpandKernelDriverInfoPfxFieldOutLen,
116     RsExpandKernelDriverInfoPfxFieldDim,
117     RsExpandKernelDriverInfoPfxFieldCurrent,
118     RsExpandKernelDriverInfoPfxFieldUsr,
119     RsExpandKernelDriverInfoPfxFieldUsLenr,
120 
121     RsExpandKernelDriverInfoPfxFieldCount
122   };
123 
124   llvm::Module *Module;
125   llvm::LLVMContext *Context;
126 
127   /*
128    * Pointers to LLVM type information for the the function signatures
129    * for expanded functions. These must be re-calculated for each module
130    * the pass is run on.
131    */
132   llvm::FunctionType *ExpandedForEachType;
133   llvm::Type *RsExpandKernelDriverInfoPfxTy;
134 
135   // Initialized when we begin to process each Module
136   bool mStructExplicitlyPaddedBySlang;
137   uint32_t mExportForEachCount;
138   const char **mExportForEachNameList;
139   const uint32_t *mExportForEachSignatureList;
140 
141   // Turns on optimization of allocation stride values.
142   bool mEnableStepOpt;
143 
getRootSignature(llvm::Function * Function)144   uint32_t getRootSignature(llvm::Function *Function) {
145     const llvm::NamedMDNode *ExportForEachMetadata =
146         Module->getNamedMetadata("#rs_export_foreach");
147 
148     if (!ExportForEachMetadata) {
149       llvm::SmallVector<llvm::Type*, 8> RootArgTys;
150       for (llvm::Function::arg_iterator B = Function->arg_begin(),
151                                         E = Function->arg_end();
152            B != E;
153            ++B) {
154         RootArgTys.push_back(B->getType());
155       }
156 
157       // For pre-ICS bitcode, we may not have signature information. In that
158       // case, we use the size of the RootArgTys to select the number of
159       // arguments.
160       return (1 << RootArgTys.size()) - 1;
161     }
162 
163     if (ExportForEachMetadata->getNumOperands() == 0) {
164       return 0;
165     }
166 
167     bccAssert(ExportForEachMetadata->getNumOperands() > 0);
168 
169     // We only handle the case for legacy root() functions here, so this is
170     // hard-coded to look at only the first such function.
171     llvm::MDNode *SigNode = ExportForEachMetadata->getOperand(0);
172     if (SigNode != nullptr && SigNode->getNumOperands() == 1) {
173       llvm::Metadata *SigMD = SigNode->getOperand(0);
174       if (llvm::MDString *SigS = llvm::dyn_cast<llvm::MDString>(SigMD)) {
175         llvm::StringRef SigString = SigS->getString();
176         uint32_t Signature = 0;
177         if (SigString.getAsInteger(10, Signature)) {
178           ALOGE("Non-integer signature value '%s'", SigString.str().c_str());
179           return 0;
180         }
181         return Signature;
182       }
183     }
184 
185     return 0;
186   }
187 
isStepOptSupported(llvm::Type * AllocType)188   bool isStepOptSupported(llvm::Type *AllocType) {
189 
190     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
191     llvm::Type *VoidPtrTy = llvm::Type::getInt8PtrTy(*Context);
192 
193     if (mEnableStepOpt) {
194       return false;
195     }
196 
197     if (AllocType == VoidPtrTy) {
198       return false;
199     }
200 
201     if (!PT) {
202       return false;
203     }
204 
205     // remaining conditions are 64-bit only
206     if (VoidPtrTy->getPrimitiveSizeInBits() == 32) {
207       return true;
208     }
209 
210     // coerce suggests an upconverted struct type, which we can't support
211     if (AllocType->getStructName().find("coerce") != llvm::StringRef::npos) {
212       return false;
213     }
214 
215     // 2xi64 and i128 suggest an upconverted struct type, which are also unsupported
216     llvm::Type *V2xi64Ty = llvm::VectorType::get(llvm::Type::getInt64Ty(*Context), 2);
217     llvm::Type *Int128Ty = llvm::Type::getIntNTy(*Context, 128);
218     if (AllocType == V2xi64Ty || AllocType == Int128Ty) {
219       return false;
220     }
221 
222     return true;
223   }
224 
225   // Get the actual value we should use to step through an allocation.
226   //
227   // Normally the value we use to step through an allocation is given to us by
228   // the driver. However, for certain primitive data types, we can derive an
229   // integer constant for the step value. We use this integer constant whenever
230   // possible to allow further compiler optimizations to take place.
231   //
232   // DL - Target Data size/layout information.
233   // T - Type of allocation (should be a pointer).
234   // OrigStep - Original step increment (root.expand() input from driver).
getStepValue(llvm::DataLayout * DL,llvm::Type * AllocType,llvm::Value * OrigStep)235   llvm::Value *getStepValue(llvm::DataLayout *DL, llvm::Type *AllocType,
236                             llvm::Value *OrigStep) {
237     bccAssert(DL);
238     bccAssert(AllocType);
239     bccAssert(OrigStep);
240     llvm::PointerType *PT = llvm::dyn_cast<llvm::PointerType>(AllocType);
241     if (isStepOptSupported(AllocType)) {
242       llvm::Type *ET = PT->getElementType();
243       uint64_t ETSize = DL->getTypeAllocSize(ET);
244       llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
245       return llvm::ConstantInt::get(Int32Ty, ETSize);
246     } else {
247       return OrigStep;
248     }
249   }
250 
251   /// Builds the types required by the pass for the given context.
buildTypes(void)252   void buildTypes(void) {
253     // Create the RsLaunchDimensionsTy and RsExpandKernelDriverInfoPfxTy structs.
254 
255     llvm::Type *Int8Ty                   = llvm::Type::getInt8Ty(*Context);
256     llvm::Type *Int8PtrTy                = Int8Ty->getPointerTo();
257     llvm::Type *Int8PtrArrayInputLimitTy = llvm::ArrayType::get(Int8PtrTy, RS_KERNEL_INPUT_LIMIT);
258     llvm::Type *Int32Ty                  = llvm::Type::getInt32Ty(*Context);
259     llvm::Type *Int32ArrayInputLimitTy   = llvm::ArrayType::get(Int32Ty, RS_KERNEL_INPUT_LIMIT);
260     llvm::Type *VoidPtrTy                = llvm::Type::getInt8PtrTy(*Context);
261     llvm::Type *Int32Array4Ty            = llvm::ArrayType::get(Int32Ty, 4);
262 
263     /* Defined in frameworks/base/libs/rs/cpu_ref/rsCpuCore.h:
264      *
265      * struct RsLaunchDimensions {
266      *   uint32_t x;
267      *   uint32_t y;
268      *   uint32_t z;
269      *   uint32_t lod;
270      *   uint32_t face;
271      *   uint32_t array[4];
272      * };
273      */
274     llvm::SmallVector<llvm::Type*, RsLaunchDimensionsFieldCount> RsLaunchDimensionsTypes;
275     RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t x
276     RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t y
277     RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t z
278     RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t lod
279     RsLaunchDimensionsTypes.push_back(Int32Ty);       // uint32_t face
280     RsLaunchDimensionsTypes.push_back(Int32Array4Ty); // uint32_t array[4]
281     llvm::StructType *RsLaunchDimensionsTy =
282         llvm::StructType::create(RsLaunchDimensionsTypes, "RsLaunchDimensions");
283 
284     /* Defined as the beginning of RsExpandKernelDriverInfo in frameworks/base/libs/rs/cpu_ref/rsCpuCoreRuntime.h:
285      *
286      * struct RsExpandKernelDriverInfoPfx {
287      *     const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT];
288      *     uint32_t inStride[RS_KERNEL_INPUT_LIMIT];
289      *     uint32_t inLen;
290      *
291      *     uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT];
292      *     uint32_t outStride[RS_KERNEL_INPUT_LIMIT];
293      *     uint32_t outLen;
294      *
295      *     // Dimension of the launch
296      *     RsLaunchDimensions dim;
297      *
298      *     // The walking iterator of the launch
299      *     RsLaunchDimensions current;
300      *
301      *     const void *usr;
302      *     uint32_t usrLen;
303      *
304      *     // Items below this line are not used by the compiler and can be change in the driver.
305      *     // So the compiler must assume there are an unknown number of fields of unknown type
306      *     // beginning here.
307      * };
308      *
309      * The name "RsExpandKernelDriverInfoPfx" is known to RSInvariantPass (RSInvariant.cpp).
310      */
311     llvm::SmallVector<llvm::Type*, RsExpandKernelDriverInfoPfxFieldCount> RsExpandKernelDriverInfoPfxTypes;
312     RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // const uint8_t *inPtr[RS_KERNEL_INPUT_LIMIT]
313     RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t inStride[RS_KERNEL_INPUT_LIMIT]
314     RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t inLen
315     RsExpandKernelDriverInfoPfxTypes.push_back(Int8PtrArrayInputLimitTy); // uint8_t *outPtr[RS_KERNEL_INPUT_LIMIT]
316     RsExpandKernelDriverInfoPfxTypes.push_back(Int32ArrayInputLimitTy);   // uint32_t outStride[RS_KERNEL_INPUT_LIMIT]
317     RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t outLen
318     RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions dim
319     RsExpandKernelDriverInfoPfxTypes.push_back(RsLaunchDimensionsTy);     // RsLaunchDimensions current
320     RsExpandKernelDriverInfoPfxTypes.push_back(VoidPtrTy);                // const void *usr
321     RsExpandKernelDriverInfoPfxTypes.push_back(Int32Ty);                  // uint32_t usrLen
322     RsExpandKernelDriverInfoPfxTy =
323         llvm::StructType::create(RsExpandKernelDriverInfoPfxTypes, "RsExpandKernelDriverInfoPfx");
324 
325     // Create the function type for expanded kernels.
326     llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
327 
328     llvm::Type *RsExpandKernelDriverInfoPfxPtrTy = RsExpandKernelDriverInfoPfxTy->getPointerTo();
329     // void (const RsExpandKernelDriverInfoPfxTy *p, uint32_t x1, uint32_t x2, uint32_t outstep)
330     ExpandedForEachType = llvm::FunctionType::get(VoidTy,
331         {RsExpandKernelDriverInfoPfxPtrTy, Int32Ty, Int32Ty, Int32Ty}, false);
332   }
333 
334   /// @brief Create skeleton of the expanded foreach kernel.
335   ///
336   /// This creates a function with the following signature:
337   ///
338   ///   void (const RsForEachStubParamStruct *p, uint32_t x1, uint32_t x2,
339   ///         uint32_t outstep)
340   ///
createEmptyExpandedForEachKernel(llvm::StringRef OldName)341   llvm::Function *createEmptyExpandedForEachKernel(llvm::StringRef OldName) {
342     llvm::Function *ExpandedFunction =
343       llvm::Function::Create(ExpandedForEachType,
344                              llvm::GlobalValue::ExternalLinkage,
345                              OldName + ".expand", Module);
346     bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
347     llvm::Function::arg_iterator AI = ExpandedFunction->arg_begin();
348     (AI++)->setName("p");
349     (AI++)->setName("x1");
350     (AI++)->setName("x2");
351     (AI++)->setName("arg_outstep");
352     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
353                                                        ExpandedFunction);
354     llvm::IRBuilder<> Builder(Begin);
355     Builder.CreateRetVoid();
356     return ExpandedFunction;
357   }
358 
359   // Create skeleton of a general reduce kernel's expanded accumulator.
360   //
361   // This creates a function with the following signature:
362   //
363   //  void @func.expand(%RsExpandKernelDriverInfoPfx* nocapture %p,
364   //                    i32 %x1, i32 %x2, accumType* nocapture %accum)
365   //
createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,llvm::Type * AccumArgTy)366   llvm::Function *createEmptyExpandedReduceAccumulator(llvm::StringRef OldName,
367                                                        llvm::Type *AccumArgTy) {
368     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
369     llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
370     llvm::FunctionType *ExpandedReduceAccumulatorType =
371         llvm::FunctionType::get(VoidTy,
372                                 {RsExpandKernelDriverInfoPfxTy->getPointerTo(),
373                                  Int32Ty, Int32Ty, AccumArgTy}, false);
374     llvm::Function *FnExpandedAccumulator =
375       llvm::Function::Create(ExpandedReduceAccumulatorType,
376                              llvm::GlobalValue::ExternalLinkage,
377                              OldName + ".expand", Module);
378     bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
379 
380     llvm::Function::arg_iterator AI = FnExpandedAccumulator->arg_begin();
381 
382     using llvm::Attribute;
383 
384     llvm::Argument *Arg_p = &(*AI++);
385     Arg_p->setName("p");
386     Arg_p->addAttr(llvm::AttributeSet::get(*Context, Arg_p->getArgNo() + 1,
387                                            llvm::makeArrayRef(Attribute::NoCapture)));
388 
389     llvm::Argument *Arg_x1 = &(*AI++);
390     Arg_x1->setName("x1");
391 
392     llvm::Argument *Arg_x2 = &(*AI++);
393     Arg_x2->setName("x2");
394 
395     llvm::Argument *Arg_accum = &(*AI++);
396     Arg_accum->setName("accum");
397     Arg_accum->addAttr(llvm::AttributeSet::get(*Context, Arg_accum->getArgNo() + 1,
398                                                llvm::makeArrayRef(Attribute::NoCapture)));
399 
400     llvm::BasicBlock *Begin = llvm::BasicBlock::Create(*Context, "Begin",
401                                                        FnExpandedAccumulator);
402     llvm::IRBuilder<> Builder(Begin);
403     Builder.CreateRetVoid();
404 
405     return FnExpandedAccumulator;
406   }
407 
408   /// @brief Create an empty loop
409   ///
410   /// Create a loop of the form:
411   ///
412   /// for (i = LowerBound; i < UpperBound; i++)
413   ///   ;
414   ///
415   /// After the loop has been created, the builder is set such that
416   /// instructions can be added to the loop body.
417   ///
418   /// @param Builder The builder to use to build this loop. The current
419   ///                position of the builder is the position the loop
420   ///                will be inserted.
421   /// @param LowerBound The first value of the loop iterator
422   /// @param UpperBound The maximal value of the loop iterator
423   /// @param LoopIV A reference that will be set to the loop iterator.
424   /// @return The BasicBlock that will be executed after the loop.
createLoop(llvm::IRBuilder<> & Builder,llvm::Value * LowerBound,llvm::Value * UpperBound,llvm::Value ** LoopIV)425   llvm::BasicBlock *createLoop(llvm::IRBuilder<> &Builder,
426                                llvm::Value *LowerBound,
427                                llvm::Value *UpperBound,
428                                llvm::Value **LoopIV) {
429     bccAssert(LowerBound->getType() == UpperBound->getType());
430 
431     llvm::BasicBlock *CondBB, *AfterBB, *HeaderBB;
432     llvm::Value *Cond, *IVNext, *IV, *IVVar;
433 
434     CondBB = Builder.GetInsertBlock();
435     AfterBB = llvm::SplitBlock(CondBB, &*Builder.GetInsertPoint(), nullptr, nullptr);
436     HeaderBB = llvm::BasicBlock::Create(*Context, "Loop", CondBB->getParent());
437 
438     CondBB->getTerminator()->eraseFromParent();
439     Builder.SetInsertPoint(CondBB);
440 
441     // decltype(LowerBound) *ivvar = alloca(sizeof(int))
442     // *ivvar = LowerBound
443     IVVar = Builder.CreateAlloca(LowerBound->getType(), nullptr, BCC_INDEX_VAR_NAME);
444     Builder.CreateStore(LowerBound, IVVar);
445 
446     // if (LowerBound < Upperbound)
447     //   goto LoopHeader
448     // else
449     //   goto AfterBB
450     Cond = Builder.CreateICmpULT(LowerBound, UpperBound);
451     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
452 
453     // LoopHeader:
454     //   iv = *ivvar
455     //   <insertion point here>
456     //   iv.next = iv + 1
457     //   *ivvar = iv.next
458     //   if (iv.next < Upperbound)
459     //     goto LoopHeader
460     //   else
461     //     goto AfterBB
462     // AfterBB:
463     Builder.SetInsertPoint(HeaderBB);
464     IV = Builder.CreateLoad(IVVar, "X");
465     IVNext = Builder.CreateNUWAdd(IV, Builder.getInt32(1));
466     Builder.CreateStore(IVNext, IVVar);
467     Cond = Builder.CreateICmpULT(IVNext, UpperBound);
468     Builder.CreateCondBr(Cond, HeaderBB, AfterBB);
469     AfterBB->setName("Exit");
470     Builder.SetInsertPoint(llvm::cast<llvm::Instruction>(IVNext));
471 
472     // Record information about this loop.
473     *LoopIV = IV;
474     return AfterBB;
475   }
476 
477   // Finish building the outgoing argument list for calling a ForEach-able function.
478   //
479   // ArgVector - on input, the non-special arguments
480   //             on output, the non-special arguments combined with the special arguments
481   //               from SpecialArgVector
482   // SpecialArgVector - special arguments (from ExpandSpecialArguments())
483   // SpecialArgContextIdx - return value of ExpandSpecialArguments()
484   //                          (position of context argument in SpecialArgVector)
485   // CalleeFunction - the ForEach-able function being called
486   // Builder - for inserting code into the caller function
487   template<unsigned int ArgVectorLen, unsigned int SpecialArgVectorLen>
finishArgList(llvm::SmallVector<llvm::Value *,ArgVectorLen> & ArgVector,const llvm::SmallVector<llvm::Value *,SpecialArgVectorLen> & SpecialArgVector,const int SpecialArgContextIdx,const llvm::Function & CalleeFunction,llvm::IRBuilder<> & CallerBuilder)488   void finishArgList(      llvm::SmallVector<llvm::Value *, ArgVectorLen>        &ArgVector,
489                      const llvm::SmallVector<llvm::Value *, SpecialArgVectorLen> &SpecialArgVector,
490                      const int SpecialArgContextIdx,
491                      const llvm::Function &CalleeFunction,
492                      llvm::IRBuilder<> &CallerBuilder) {
493     /* The context argument (if any) is a pointer to an opaque user-visible type that differs from
494      * the RsExpandKernelDriverInfoPfx type used in the function we are generating (although the
495      * two types represent the same thing).  Therefore, we must introduce a pointer cast when
496      * generating a call to the kernel function.
497      */
498     const int ArgContextIdx =
499         SpecialArgContextIdx >= 0 ? (ArgVector.size() + SpecialArgContextIdx) : SpecialArgContextIdx;
500     ArgVector.append(SpecialArgVector.begin(), SpecialArgVector.end());
501     if (ArgContextIdx >= 0) {
502       llvm::Type *ContextArgType = nullptr;
503       int ArgIdx = ArgContextIdx;
504       for (const auto &Arg : CalleeFunction.getArgumentList()) {
505         if (!ArgIdx--) {
506           ContextArgType = Arg.getType();
507           break;
508         }
509       }
510       bccAssert(ContextArgType);
511       ArgVector[ArgContextIdx] = CallerBuilder.CreatePointerCast(ArgVector[ArgContextIdx], ContextArgType);
512     }
513   }
514 
515   // GEPHelper() returns a SmallVector of values suitable for passing
516   // to IRBuilder::CreateGEP(), and SmallGEPIndices is a typedef for
517   // the returned data type. It is sized so that the SmallVector
518   // returned by GEPHelper() never needs to do a heap allocation for
519   // any list of GEP indices it encounters in the code.
520   typedef llvm::SmallVector<llvm::Value *, 3> SmallGEPIndices;
521 
522   // Helper for turning a list of constant integer GEP indices into a
523   // SmallVector of llvm::Value*. The return value is suitable for
524   // passing to a GetElementPtrInst constructor or IRBuilder::CreateGEP().
525   //
526   // Inputs:
527   //   I32Args should be integers which represent the index arguments
528   //   to a GEP instruction.
529   //
530   // Returns:
531   //   Returns a SmallVector of ConstantInts.
GEPHelper(const std::initializer_list<int32_t> I32Args)532   SmallGEPIndices GEPHelper(const std::initializer_list<int32_t> I32Args) {
533     SmallGEPIndices Out(I32Args.size());
534     llvm::IntegerType *I32Ty = llvm::Type::getInt32Ty(*Context);
535     std::transform(I32Args.begin(), I32Args.end(), Out.begin(),
536                    [I32Ty](int32_t Arg) { return llvm::ConstantInt::get(I32Ty, Arg); });
537     return Out;
538   }
539 
540 public:
RSKernelExpandPass(bool pEnableStepOpt=true)541   explicit RSKernelExpandPass(bool pEnableStepOpt = true)
542       : ModulePass(ID), Module(nullptr), Context(nullptr),
543         mEnableStepOpt(pEnableStepOpt) {
544 
545   }
546 
getAnalysisUsage(llvm::AnalysisUsage & AU) const547   virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
548     // This pass does not use any other analysis passes, but it does
549     // add/wrap the existing functions in the module (thus altering the CFG).
550   }
551 
552   // Build contribution to outgoing argument list for calling a
553   // ForEach-able function or a general reduction accumulator
554   // function, based on the special parameters of that function.
555   //
556   // Signature - metadata bits for the signature of the callee
557   // X, Arg_p - values derived directly from expanded function,
558   //            suitable for computing arguments for the callee
559   // CalleeArgs - contribution is accumulated here
560   // Bump - invoked once for each contributed outgoing argument
561   // LoopHeaderInsertionPoint - an Instruction in the loop header, before which
562   //                            this function can insert loop-invariant loads
563   //
564   // Return value is the (zero-based) position of the context (Arg_p)
565   // argument in the CalleeArgs vector, or a negative value if the
566   // context argument is not placed in the CalleeArgs vector.
ExpandSpecialArguments(uint32_t Signature,llvm::Value * X,llvm::Value * Arg_p,llvm::IRBuilder<> & Builder,llvm::SmallVector<llvm::Value *,8> & CalleeArgs,const std::function<void ()> & Bump,llvm::Instruction * LoopHeaderInsertionPoint)567   int ExpandSpecialArguments(uint32_t Signature,
568                              llvm::Value *X,
569                              llvm::Value *Arg_p,
570                              llvm::IRBuilder<> &Builder,
571                              llvm::SmallVector<llvm::Value*, 8> &CalleeArgs,
572                              const std::function<void ()> &Bump,
573                              llvm::Instruction *LoopHeaderInsertionPoint) {
574 
575     bccAssert(CalleeArgs.empty());
576 
577     int Return = -1;
578     if (bcinfo::MetadataExtractor::hasForEachSignatureCtxt(Signature)) {
579       CalleeArgs.push_back(Arg_p);
580       Bump();
581       Return = CalleeArgs.size() - 1;
582     }
583 
584     if (bcinfo::MetadataExtractor::hasForEachSignatureX(Signature)) {
585       CalleeArgs.push_back(X);
586       Bump();
587     }
588 
589     if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature) ||
590         bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
591       bccAssert(LoopHeaderInsertionPoint);
592 
593       // Y and Z are loop invariant, so they can be hoisted out of the
594       // loop. Set the IRBuilder insertion point to the loop header.
595       auto OldInsertionPoint = Builder.saveIP();
596       Builder.SetInsertPoint(LoopHeaderInsertionPoint);
597 
598       if (bcinfo::MetadataExtractor::hasForEachSignatureY(Signature)) {
599         SmallGEPIndices YValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
600           RsLaunchDimensionsFieldY}));
601         llvm::Value *YAddr = Builder.CreateInBoundsGEP(Arg_p, YValueGEP, "Y.gep");
602         CalleeArgs.push_back(Builder.CreateLoad(YAddr, "Y"));
603         Bump();
604       }
605 
606       if (bcinfo::MetadataExtractor::hasForEachSignatureZ(Signature)) {
607         SmallGEPIndices ZValueGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldCurrent,
608           RsLaunchDimensionsFieldZ}));
609         llvm::Value *ZAddr = Builder.CreateInBoundsGEP(Arg_p, ZValueGEP, "Z.gep");
610         CalleeArgs.push_back(Builder.CreateLoad(ZAddr, "Z"));
611         Bump();
612       }
613 
614       Builder.restoreIP(OldInsertionPoint);
615     }
616 
617     return Return;
618   }
619 
620   // Generate loop-invariant input processing setup code for an expanded
621   // ForEach-able function or an expanded general reduction accumulator
622   // function.
623   //
624   // LoopHeader - block at the end of which the setup code will be inserted
625   // Arg_p - RSKernelDriverInfo pointer passed to the expanded function
626   // TBAAPointer - metadata for marking loads of pointer values out of RSKernelDriverInfo
627   // ArgIter - iterator pointing to first input of the UNexpanded function
628   // NumInputs - number of inputs (NOT number of ARGUMENTS)
629   //
630   // InTypes[] - this function saves input type, they will be used in ExpandInputsBody().
631   // InBufPtrs[] - this function sets each array element to point to the first cell / byte
632   //               (byte for x86, cell for other platforms) of the corresponding input allocation
633   // InStructTempSlots[] - this function sets each array element either to nullptr
634   //                       or to the result of an alloca (for the case where the
635   //                       calling convention dictates that a value must be passed
636   //                       by reference, and so we need a stacked temporary to hold
637   //                       a copy of that value)
ExpandInputsLoopInvariant(llvm::IRBuilder<> & Builder,llvm::BasicBlock * LoopHeader,llvm::Value * Arg_p,llvm::MDNode * TBAAPointer,llvm::Function::arg_iterator ArgIter,const size_t NumInputs,llvm::SmallVectorImpl<llvm::Type * > & InTypes,llvm::SmallVectorImpl<llvm::Value * > & InBufPtrs,llvm::SmallVectorImpl<llvm::Value * > & InStructTempSlots)638   void ExpandInputsLoopInvariant(llvm::IRBuilder<> &Builder, llvm::BasicBlock *LoopHeader,
639                                  llvm::Value *Arg_p,
640                                  llvm::MDNode *TBAAPointer,
641                                  llvm::Function::arg_iterator ArgIter,
642                                  const size_t NumInputs,
643                                  llvm::SmallVectorImpl<llvm::Type *> &InTypes,
644                                  llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
645                                  llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots) {
646     bccAssert(NumInputs <= RS_KERNEL_INPUT_LIMIT);
647 
648     // Extract information about input slots. The work done
649     // here is loop-invariant, so we can hoist the operations out of the loop.
650     auto OldInsertionPoint = Builder.saveIP();
651     Builder.SetInsertPoint(LoopHeader->getTerminator());
652 
653     for (size_t InputIndex = 0; InputIndex < NumInputs; ++InputIndex, ArgIter++) {
654       llvm::Type *InType = ArgIter->getType();
655 
656       /*
657        * AArch64 calling conventions dictate that structs of sufficient size
658        * get passed by pointer instead of passed by value.  This, combined
659        * with the fact that we don't allow kernels to operate on pointer
660        * data means that if we see a kernel with a pointer parameter we know
661        * that it is a struct input that has been promoted.  As such we don't
662        * need to convert its type to a pointer.  Later we will need to know
663        * to create a temporary copy on the stack, so we save this information
664        * in InStructTempSlots.
665        */
666       if (auto PtrType = llvm::dyn_cast<llvm::PointerType>(InType)) {
667         llvm::Type *ElementType = PtrType->getElementType();
668         InStructTempSlots.push_back(Builder.CreateAlloca(ElementType, nullptr,
669                                                          "input_struct_slot"));
670       } else {
671         InType = InType->getPointerTo();
672         InStructTempSlots.push_back(nullptr);
673       }
674 
675       SmallGEPIndices InBufPtrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr,
676                                              static_cast<int32_t>(InputIndex)}));
677       llvm::Value    *InBufPtrAddr = Builder.CreateInBoundsGEP(Arg_p, InBufPtrGEP, "input_buf.gep");
678       llvm::LoadInst *InBufPtr = Builder.CreateLoad(InBufPtrAddr, "input_buf");
679 
680       llvm::Value *CastInBufPtr = nullptr;
681       if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
682         CastInBufPtr = Builder.CreatePointerCast(InBufPtr, InType, "casted_in");
683       } else {
684         // The disagreement between module and x86 target machine datalayout
685         // causes mismatched input/output data offset between slang reflected
686         // code and bcc codegen for GetElementPtr. To solve this issue, skip the
687         // cast to InType and leave CastInBufPtr as an int8_t*.  The buffer is
688         // later indexed with an explicit byte offset computed based on
689         // X86_CUSTOM_DL_STRING and then bitcast to actual input type.
690         CastInBufPtr = InBufPtr;
691       }
692 
693       if (gEnableRsTbaa) {
694         InBufPtr->setMetadata("tbaa", TBAAPointer);
695       }
696 
697       InTypes.push_back(InType);
698       InBufPtrs.push_back(CastInBufPtr);
699     }
700 
701     Builder.restoreIP(OldInsertionPoint);
702   }
703 
704   // Generate loop-varying input processing code for an expanded ForEach-able function
705   // or an expanded general reduction accumulator function.  Also, for the call to the
706   // UNexpanded function, collect the portion of the argument list corresponding to the
707   // inputs.
708   //
709   // Arg_x1 - first X coordinate to be processed by the expanded function
710   // TBAAAllocation - metadata for marking loads of input values out of allocations
711   // NumInputs -- number of inputs (NOT number of ARGUMENTS)
712   // InTypes[] - this function uses the saved input types in ExpandInputsLoopInvariant()
713   //             to convert the pointer of byte InPtr to its real type.
714   // InBufPtrs[] - this function consumes the information produced by ExpandInputsLoopInvariant()
715   // InStructTempSlots[] - this function consumes the information produced by ExpandInputsLoopInvariant()
716   // IndVar - value of loop induction variable (X coordinate) for a given loop iteration
717   //
718   // RootArgs - this function sets this to the list of outgoing argument values corresponding
719   //            to the inputs
ExpandInputsBody(llvm::IRBuilder<> & Builder,llvm::Value * Arg_x1,llvm::MDNode * TBAAAllocation,const size_t NumInputs,const llvm::SmallVectorImpl<llvm::Type * > & InTypes,const llvm::SmallVectorImpl<llvm::Value * > & InBufPtrs,const llvm::SmallVectorImpl<llvm::Value * > & InStructTempSlots,llvm::Value * IndVar,llvm::SmallVectorImpl<llvm::Value * > & RootArgs)720   void ExpandInputsBody(llvm::IRBuilder<> &Builder,
721                         llvm::Value *Arg_x1,
722                         llvm::MDNode *TBAAAllocation,
723                         const size_t NumInputs,
724                         const llvm::SmallVectorImpl<llvm::Type *> &InTypes,
725                         const llvm::SmallVectorImpl<llvm::Value *> &InBufPtrs,
726                         const llvm::SmallVectorImpl<llvm::Value *> &InStructTempSlots,
727                         llvm::Value *IndVar,
728                         llvm::SmallVectorImpl<llvm::Value *> &RootArgs) {
729     llvm::Value *Offset = Builder.CreateSub(IndVar, Arg_x1);
730     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
731 
732     for (size_t Index = 0; Index < NumInputs; ++Index) {
733 
734       llvm::Value *InPtr = nullptr;
735       if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
736         InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], Offset);
737       } else {
738         // Treat x86 input buffer as byte[], get indexed pointer with explicit
739         // byte offset computed using a datalayout based on
740         // X86_CUSTOM_DL_STRING, then bitcast it to actual input type.
741         llvm::DataLayout DL(X86_CUSTOM_DL_STRING);
742         llvm::Type *InTy = InTypes[Index];
743         uint64_t InStep = DL.getTypeAllocSize(InTy->getPointerElementType());
744         llvm::Value *OffsetInBytes = Builder.CreateMul(Offset, llvm::ConstantInt::get(Int32Ty, InStep));
745         InPtr = Builder.CreateInBoundsGEP(InBufPtrs[Index], OffsetInBytes);
746         InPtr = Builder.CreatePointerCast(InPtr, InTy);
747       }
748 
749       llvm::Value *Input;
750       llvm::LoadInst *InputLoad = Builder.CreateLoad(InPtr, "input");
751 
752       if (gEnableRsTbaa) {
753         InputLoad->setMetadata("tbaa", TBAAAllocation);
754       }
755 
756       if (llvm::Value *TemporarySlot = InStructTempSlots[Index]) {
757         // Pass a pointer to a temporary on the stack, rather than
758         // passing a pointer to the original value. We do not want
759         // the kernel to potentially modify the input data.
760 
761         // Note: don't annotate with TBAA, since the kernel might
762         // have its own TBAA annotations for the pointer argument.
763         Builder.CreateStore(InputLoad, TemporarySlot);
764         Input = TemporarySlot;
765       } else {
766         Input = InputLoad;
767       }
768 
769       RootArgs.push_back(Input);
770     }
771   }
772 
773   /* Performs the actual optimization on a selected function. On success, the
774    * Module will contain a new function of the name "<NAME>.expand" that
775    * invokes <NAME>() in a loop with the appropriate parameters.
776    */
ExpandOldStyleForEach(llvm::Function * Function,uint32_t Signature)777   bool ExpandOldStyleForEach(llvm::Function *Function, uint32_t Signature) {
778     ALOGV("Expanding ForEach-able Function %s",
779           Function->getName().str().c_str());
780 
781     if (!Signature) {
782       Signature = getRootSignature(Function);
783       if (!Signature) {
784         // We couldn't determine how to expand this function based on its
785         // function signature.
786         return false;
787       }
788     }
789 
790     llvm::DataLayout DL(Module);
791     if (!mStructExplicitlyPaddedBySlang && (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING)) {
792       DL.reset(X86_CUSTOM_DL_STRING);
793     }
794 
795     llvm::Function *ExpandedFunction =
796       createEmptyExpandedForEachKernel(Function->getName());
797 
798     /*
799      * Extract the expanded function's parameters.  It is guaranteed by
800      * createEmptyExpandedForEachKernel that there will be four parameters.
801      */
802 
803     bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
804 
805     llvm::Function::arg_iterator ExpandedFunctionArgIter =
806       ExpandedFunction->arg_begin();
807 
808     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
809     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
810     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
811     llvm::Value *Arg_outstep = &*(ExpandedFunctionArgIter);
812 
813     llvm::Value *InStep  = nullptr;
814     llvm::Value *OutStep = nullptr;
815 
816     // Construct the actual function body.
817     llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
818 
819     // Collect and construct the arguments for the kernel().
820     // Note that we load any loop-invariant arguments before entering the Loop.
821     llvm::Function::arg_iterator FunctionArgIter = Function->arg_begin();
822 
823     llvm::Type  *InTy      = nullptr;
824     llvm::Value *InBufPtr = nullptr;
825     if (bcinfo::MetadataExtractor::hasForEachSignatureIn(Signature)) {
826       SmallGEPIndices InStepGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInStride, 0}));
827       llvm::LoadInst *InStepArg  = Builder.CreateLoad(
828         Builder.CreateInBoundsGEP(Arg_p, InStepGEP, "instep_addr.gep"), "instep_addr");
829 
830       InTy = (FunctionArgIter++)->getType();
831       InStep = getStepValue(&DL, InTy, InStepArg);
832 
833       InStep->setName("instep");
834 
835       SmallGEPIndices InputAddrGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldInPtr, 0}));
836       InBufPtr = Builder.CreateLoad(
837         Builder.CreateInBoundsGEP(Arg_p, InputAddrGEP, "input_buf.gep"), "input_buf");
838     }
839 
840     llvm::Type *OutTy = nullptr;
841     llvm::Value *OutBasePtr = nullptr;
842     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
843       OutTy = (FunctionArgIter++)->getType();
844       OutStep = getStepValue(&DL, OutTy, Arg_outstep);
845       OutStep->setName("outstep");
846       SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
847       OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
848     }
849 
850     llvm::Value *UsrData = nullptr;
851     if (bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature)) {
852       llvm::Type *UsrDataTy = (FunctionArgIter++)->getType();
853       llvm::Value *UsrDataPointerAddr = Builder.CreateStructGEP(nullptr, Arg_p, RsExpandKernelDriverInfoPfxFieldUsr);
854       UsrData = Builder.CreatePointerCast(Builder.CreateLoad(UsrDataPointerAddr), UsrDataTy);
855       UsrData->setName("UsrData");
856     }
857 
858     llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
859     llvm::Value *IV;
860     createLoop(Builder, Arg_x1, Arg_x2, &IV);
861 
862     llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
863     const int CalleeArgsContextIdx = ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
864                                                             [&FunctionArgIter]() { FunctionArgIter++; },
865                                                             LoopHeader->getTerminator());
866 
867     bccAssert(FunctionArgIter == Function->arg_end());
868 
869     // Populate the actual call to kernel().
870     llvm::SmallVector<llvm::Value*, 8> RootArgs;
871 
872     llvm::Value *InPtr  = nullptr;
873     llvm::Value *OutPtr = nullptr;
874 
875     // Calculate the current input and output pointers
876     //
877     // We always calculate the input/output pointers with a GEP operating on i8
878     // values and only cast at the very end to OutTy. This is because the step
879     // between two values is given in bytes.
880     //
881     // TODO: We could further optimize the output by using a GEP operation of
882     // type 'OutTy' in cases where the element type of the allocation allows.
883     if (OutBasePtr) {
884       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
885       OutOffset = Builder.CreateMul(OutOffset, OutStep);
886       OutPtr = Builder.CreateInBoundsGEP(OutBasePtr, OutOffset);
887       OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
888     }
889 
890     if (InBufPtr) {
891       llvm::Value *InOffset = Builder.CreateSub(IV, Arg_x1);
892       InOffset = Builder.CreateMul(InOffset, InStep);
893       InPtr = Builder.CreateInBoundsGEP(InBufPtr, InOffset);
894       InPtr = Builder.CreatePointerCast(InPtr, InTy);
895     }
896 
897     if (InPtr) {
898       RootArgs.push_back(InPtr);
899     }
900 
901     if (OutPtr) {
902       RootArgs.push_back(OutPtr);
903     }
904 
905     if (UsrData) {
906       RootArgs.push_back(UsrData);
907     }
908 
909     finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
910 
911     Builder.CreateCall(Function, RootArgs);
912 
913     return true;
914   }
915 
916   /* Expand a pass-by-value foreach kernel.
917    */
ExpandForEach(llvm::Function * Function,uint32_t Signature)918   bool ExpandForEach(llvm::Function *Function, uint32_t Signature) {
919     bccAssert(bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature));
920     ALOGV("Expanding kernel Function %s", Function->getName().str().c_str());
921 
922     // TODO: Refactor this to share functionality with ExpandOldStyleForEach.
923     llvm::DataLayout DL(Module);
924     if (!mStructExplicitlyPaddedBySlang && (Module->getTargetTriple() == DEFAULT_X86_TRIPLE_STRING)) {
925       DL.reset(X86_CUSTOM_DL_STRING);
926     }
927     llvm::Type *Int32Ty = llvm::Type::getInt32Ty(*Context);
928 
929     llvm::Function *ExpandedFunction =
930       createEmptyExpandedForEachKernel(Function->getName());
931 
932     /*
933      * Extract the expanded function's parameters.  It is guaranteed by
934      * createEmptyExpandedForEachKernel that there will be four parameters.
935      */
936 
937     bccAssert(ExpandedFunction->arg_size() == kNumExpandedForeachParams);
938 
939     llvm::Function::arg_iterator ExpandedFunctionArgIter =
940       ExpandedFunction->arg_begin();
941 
942     llvm::Value *Arg_p       = &*(ExpandedFunctionArgIter++);
943     llvm::Value *Arg_x1      = &*(ExpandedFunctionArgIter++);
944     llvm::Value *Arg_x2      = &*(ExpandedFunctionArgIter++);
945     // Arg_outstep is not used by expanded new-style forEach kernels.
946 
947     // Construct the actual function body.
948     llvm::IRBuilder<> Builder(&*ExpandedFunction->getEntryBlock().begin());
949 
950     // Create TBAA meta-data.
951     llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
952                  *TBAAAllocation, *TBAAPointer;
953     llvm::MDBuilder MDHelper(*Context);
954 
955     TBAARenderScriptDistinct =
956       MDHelper.createTBAARoot(kRenderScriptTBAARootName);
957     TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
958         TBAARenderScriptDistinct);
959     TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
960                                                        TBAARenderScript);
961     TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
962                                                       TBAAAllocation, 0);
963     TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
964                                                     TBAARenderScript);
965     TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
966 
967     /*
968      * Collect and construct the arguments for the kernel().
969      *
970      * Note that we load any loop-invariant arguments before entering the Loop.
971      */
972     size_t NumRemainingInputs = Function->arg_size();
973 
974     // No usrData parameter on kernels.
975     bccAssert(
976         !bcinfo::MetadataExtractor::hasForEachSignatureUsrData(Signature));
977 
978     llvm::Function::arg_iterator ArgIter = Function->arg_begin();
979 
980     // Check the return type
981     llvm::Type     *OutTy            = nullptr;
982     llvm::LoadInst *OutBasePtr       = nullptr;
983     llvm::Value    *CastedOutBasePtr = nullptr;
984 
985     bool PassOutByPointer = false;
986 
987     if (bcinfo::MetadataExtractor::hasForEachSignatureOut(Signature)) {
988       llvm::Type *OutBaseTy = Function->getReturnType();
989 
990       if (OutBaseTy->isVoidTy()) {
991         PassOutByPointer = true;
992         OutTy = ArgIter->getType();
993 
994         ArgIter++;
995         --NumRemainingInputs;
996       } else {
997         // We don't increment Args, since we are using the actual return type.
998         OutTy = OutBaseTy->getPointerTo();
999       }
1000 
1001       SmallGEPIndices OutBaseGEP(GEPHelper({0, RsExpandKernelDriverInfoPfxFieldOutPtr, 0}));
1002       OutBasePtr = Builder.CreateLoad(Builder.CreateInBoundsGEP(Arg_p, OutBaseGEP, "out_buf.gep"));
1003 
1004       if (gEnableRsTbaa) {
1005         OutBasePtr->setMetadata("tbaa", TBAAPointer);
1006       }
1007 
1008       if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
1009         CastedOutBasePtr = Builder.CreatePointerCast(OutBasePtr, OutTy, "casted_out");
1010       } else {
1011         // The disagreement between module and x86 target machine datalayout
1012         // causes mismatched input/output data offset between slang reflected
1013         // code and bcc codegen for GetElementPtr. To solve this issue, skip the
1014         // cast to OutTy and leave CastedOutBasePtr as an int8_t*.  The buffer
1015         // is later indexed with an explicit byte offset computed based on
1016         // X86_CUSTOM_DL_STRING and then bitcast to actual output type.
1017         CastedOutBasePtr = OutBasePtr;
1018       }
1019     }
1020 
1021     llvm::SmallVector<llvm::Type*,  8> InTypes;
1022     llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1023     llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1024 
1025     bccAssert(NumRemainingInputs <= RS_KERNEL_INPUT_LIMIT);
1026 
1027     // Create the loop structure.
1028     llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
1029     llvm::Value *IV;
1030     createLoop(Builder, Arg_x1, Arg_x2, &IV);
1031 
1032     llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1033     const int CalleeArgsContextIdx =
1034       ExpandSpecialArguments(Signature, IV, Arg_p, Builder, CalleeArgs,
1035                              [&NumRemainingInputs]() { --NumRemainingInputs; },
1036                              LoopHeader->getTerminator());
1037 
1038     // After ExpandSpecialArguments() gets called, NumRemainingInputs
1039     // counts the number of arguments to the kernel that correspond to
1040     // an array entry from the InPtr field of the DriverInfo
1041     // structure.
1042     const size_t NumInPtrArguments = NumRemainingInputs;
1043 
1044     if (NumInPtrArguments > 0) {
1045       ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, ArgIter, NumInPtrArguments,
1046                                 InTypes, InBufPtrs, InStructTempSlots);
1047     }
1048 
1049     // Populate the actual call to kernel().
1050     llvm::SmallVector<llvm::Value*, 8> RootArgs;
1051 
1052     // Calculate the current input and output pointers.
1053 
1054     // Output
1055 
1056     llvm::Value *OutPtr = nullptr;
1057     if (CastedOutBasePtr) {
1058       llvm::Value *OutOffset = Builder.CreateSub(IV, Arg_x1);
1059 
1060       if (mStructExplicitlyPaddedBySlang || (Module->getTargetTriple() != DEFAULT_X86_TRIPLE_STRING)) {
1061         OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffset);
1062       } else {
1063         // Treat x86 output buffer as byte[], get indexed pointer with explicit
1064         // byte offset computed using a datalayout based on
1065         // X86_CUSTOM_DL_STRING, then bitcast it to actual output type.
1066         uint64_t OutStep = DL.getTypeAllocSize(OutTy->getPointerElementType());
1067         llvm::Value *OutOffsetInBytes = Builder.CreateMul(OutOffset, llvm::ConstantInt::get(Int32Ty, OutStep));
1068         OutPtr = Builder.CreateInBoundsGEP(CastedOutBasePtr, OutOffsetInBytes);
1069         OutPtr = Builder.CreatePointerCast(OutPtr, OutTy);
1070       }
1071 
1072       if (PassOutByPointer) {
1073         RootArgs.push_back(OutPtr);
1074       }
1075     }
1076 
1077     // Inputs
1078 
1079     if (NumInPtrArguments > 0) {
1080       ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInPtrArguments,
1081                        InTypes, InBufPtrs, InStructTempSlots, IV, RootArgs);
1082     }
1083 
1084     finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *Function, Builder);
1085 
1086     llvm::Value *RetVal = Builder.CreateCall(Function, RootArgs);
1087 
1088     if (OutPtr && !PassOutByPointer) {
1089       RetVal->setName("call.result");
1090       llvm::StoreInst *Store = Builder.CreateStore(RetVal, OutPtr);
1091       if (gEnableRsTbaa) {
1092         Store->setMetadata("tbaa", TBAAAllocation);
1093       }
1094     }
1095 
1096     return true;
1097   }
1098 
1099   // Certain categories of functions that make up a general
1100   // reduce-style kernel are called directly from the driver with no
1101   // expansion needed.  For a function in such a category, we need to
1102   // promote linkage from static to external, to ensure that the
1103   // function is visible to the driver in the dynamic symbol table.
1104   // This promotion is safe because we don't have any kind of cross
1105   // translation unit linkage model (except for linking against
1106   // RenderScript libraries), so we do not risk name clashes.
PromoteReduceFunction(const char * Name,FunctionSet & PromotedFunctions)1107   bool PromoteReduceFunction(const char *Name, FunctionSet &PromotedFunctions) {
1108     if (!Name)  // a presumably-optional function that is not present
1109       return false;
1110 
1111     llvm::Function *Fn = Module->getFunction(Name);
1112     bccAssert(Fn != nullptr);
1113     if (PromotedFunctions.insert(Fn).second) {
1114       bccAssert(Fn->getLinkage() == llvm::GlobalValue::InternalLinkage);
1115       Fn->setLinkage(llvm::GlobalValue::ExternalLinkage);
1116       return true;
1117     }
1118 
1119     return false;
1120   }
1121 
1122   // Expand the accumulator function for a general reduce-style kernel.
1123   //
1124   // The input is a function of the form
1125   //
1126   //   define void @func(accumType* %accum, foo1 in1[, ... fooN inN] [, special arguments])
1127   //
1128   // where all arguments except the first are the same as for a foreach kernel.
1129   //
1130   // The input accumulator function gets expanded into a function of the form
1131   //
1132   //   define void @func.expand(%RsExpandKernelDriverInfoPfx* %p, i32 %x1, i32 %x2, accumType* %accum)
1133   //
1134   // which performs a serial accumulaion of elements [x1, x2) into *%accum.
1135   //
1136   // In pseudocode, @func.expand does:
1137   //
1138   //   for (i = %x1; i < %x2; ++i) {
1139   //     func(%accum,
1140   //          *((foo1 *)p->inPtr[0] + i)[, ... *((fooN *)p->inPtr[N-1] + i)
1141   //          [, p] [, i] [, p->current.y] [, p->current.z]);
1142   //   }
1143   //
1144   // This is very similar to foreach kernel expansion with no output.
ExpandReduceAccumulator(llvm::Function * FnAccumulator,uint32_t Signature,size_t NumInputs)1145   bool ExpandReduceAccumulator(llvm::Function *FnAccumulator, uint32_t Signature, size_t NumInputs) {
1146     ALOGV("Expanding accumulator %s for general reduce kernel",
1147           FnAccumulator->getName().str().c_str());
1148 
1149     // Create TBAA meta-data.
1150     llvm::MDNode *TBAARenderScriptDistinct, *TBAARenderScript,
1151                  *TBAAAllocation, *TBAAPointer;
1152     llvm::MDBuilder MDHelper(*Context);
1153     TBAARenderScriptDistinct =
1154       MDHelper.createTBAARoot(kRenderScriptTBAARootName);
1155     TBAARenderScript = MDHelper.createTBAANode(kRenderScriptTBAANodeName,
1156         TBAARenderScriptDistinct);
1157     TBAAAllocation = MDHelper.createTBAAScalarTypeNode("allocation",
1158                                                        TBAARenderScript);
1159     TBAAAllocation = MDHelper.createTBAAStructTagNode(TBAAAllocation,
1160                                                       TBAAAllocation, 0);
1161     TBAAPointer = MDHelper.createTBAAScalarTypeNode("pointer",
1162                                                     TBAARenderScript);
1163     TBAAPointer = MDHelper.createTBAAStructTagNode(TBAAPointer, TBAAPointer, 0);
1164 
1165     auto AccumulatorArgIter = FnAccumulator->arg_begin();
1166 
1167     // Create empty accumulator function.
1168     llvm::Function *FnExpandedAccumulator =
1169         createEmptyExpandedReduceAccumulator(FnAccumulator->getName(),
1170                                              (AccumulatorArgIter++)->getType());
1171 
1172     // Extract the expanded accumulator's parameters.  It is
1173     // guaranteed by createEmptyExpandedReduceAccumulator that
1174     // there will be 4 parameters.
1175     bccAssert(FnExpandedAccumulator->arg_size() == kNumExpandedReduceAccumulatorParams);
1176     auto ExpandedAccumulatorArgIter = FnExpandedAccumulator->arg_begin();
1177     llvm::Value *Arg_p     = &*(ExpandedAccumulatorArgIter++);
1178     llvm::Value *Arg_x1    = &*(ExpandedAccumulatorArgIter++);
1179     llvm::Value *Arg_x2    = &*(ExpandedAccumulatorArgIter++);
1180     llvm::Value *Arg_accum = &*(ExpandedAccumulatorArgIter++);
1181 
1182     // Construct the actual function body.
1183     llvm::IRBuilder<> Builder(&*FnExpandedAccumulator->getEntryBlock().begin());
1184 
1185     // Create the loop structure.
1186     llvm::BasicBlock *LoopHeader = Builder.GetInsertBlock();
1187     llvm::Value *IndVar;
1188     createLoop(Builder, Arg_x1, Arg_x2, &IndVar);
1189 
1190     llvm::SmallVector<llvm::Value*, 8> CalleeArgs;
1191     const int CalleeArgsContextIdx =
1192         ExpandSpecialArguments(Signature, IndVar, Arg_p, Builder, CalleeArgs,
1193                                [](){}, LoopHeader->getTerminator());
1194 
1195     llvm::SmallVector<llvm::Type*,  8> InTypes;
1196     llvm::SmallVector<llvm::Value*, 8> InBufPtrs;
1197     llvm::SmallVector<llvm::Value*, 8> InStructTempSlots;
1198     ExpandInputsLoopInvariant(Builder, LoopHeader, Arg_p, TBAAPointer, AccumulatorArgIter, NumInputs,
1199                               InTypes, InBufPtrs, InStructTempSlots);
1200 
1201     // Populate the actual call to the original accumulator.
1202     llvm::SmallVector<llvm::Value*, 8> RootArgs;
1203     RootArgs.push_back(Arg_accum);
1204     ExpandInputsBody(Builder, Arg_x1, TBAAAllocation, NumInputs, InTypes, InBufPtrs, InStructTempSlots,
1205                      IndVar, RootArgs);
1206     finishArgList(RootArgs, CalleeArgs, CalleeArgsContextIdx, *FnAccumulator, Builder);
1207     Builder.CreateCall(FnAccumulator, RootArgs);
1208 
1209     return true;
1210   }
1211 
1212   // Create a combiner function for a general reduce-style kernel that lacks one,
1213   // by calling the accumulator function.
1214   //
1215   // The accumulator function must be of the form
1216   //
1217   //   define void @accumFn(accumType* %accum, accumType %in)
1218   //
1219   // A combiner function will be generated of the form
1220   //
1221   //   define void @accumFn.combiner(accumType* %accum, accumType* %other) {
1222   //     %1 = load accumType, accumType* %other
1223   //     call void @accumFn(accumType* %accum, accumType %1);
1224   //   }
CreateReduceCombinerFromAccumulator(llvm::Function * FnAccumulator)1225   bool CreateReduceCombinerFromAccumulator(llvm::Function *FnAccumulator) {
1226     ALOGV("Creating combiner from accumulator %s for general reduce kernel",
1227           FnAccumulator->getName().str().c_str());
1228 
1229     using llvm::Attribute;
1230 
1231     bccAssert(FnAccumulator->arg_size() == 2);
1232     auto AccumulatorArgIter = FnAccumulator->arg_begin();
1233     llvm::Value *AccumulatorArg_accum = &*(AccumulatorArgIter++);
1234     llvm::Value *AccumulatorArg_in    = &*(AccumulatorArgIter++);
1235     llvm::Type *AccumulatorArgType = AccumulatorArg_accum->getType();
1236     bccAssert(AccumulatorArgType->isPointerTy());
1237 
1238     llvm::Type *VoidTy = llvm::Type::getVoidTy(*Context);
1239     llvm::FunctionType *CombinerType =
1240         llvm::FunctionType::get(VoidTy, { AccumulatorArgType, AccumulatorArgType }, false);
1241     llvm::Function *FnCombiner =
1242         llvm::Function::Create(CombinerType, llvm::GlobalValue::ExternalLinkage,
1243                                nameReduceCombinerFromAccumulator(FnAccumulator->getName()),
1244                                Module);
1245 
1246     auto CombinerArgIter = FnCombiner->arg_begin();
1247 
1248     llvm::Argument *CombinerArg_accum = &(*CombinerArgIter++);
1249     CombinerArg_accum->setName("accum");
1250     CombinerArg_accum->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_accum->getArgNo() + 1,
1251                                                        llvm::makeArrayRef(Attribute::NoCapture)));
1252 
1253     llvm::Argument *CombinerArg_other = &(*CombinerArgIter++);
1254     CombinerArg_other->setName("other");
1255     CombinerArg_other->addAttr(llvm::AttributeSet::get(*Context, CombinerArg_other->getArgNo() + 1,
1256                                                        llvm::makeArrayRef(Attribute::NoCapture)));
1257 
1258     llvm::BasicBlock *BB = llvm::BasicBlock::Create(*Context, "BB", FnCombiner);
1259     llvm::IRBuilder<> Builder(BB);
1260 
1261     if (AccumulatorArg_in->getType()->isPointerTy()) {
1262       // Types of sufficient size get passed by pointer-to-copy rather
1263       // than passed by value.  An accumulator cannot take a pointer
1264       // at the user level; so if we see a pointer here, we know that
1265       // we have a pass-by-pointer-to-copy case.
1266       llvm::Type *ElementType = AccumulatorArg_in->getType()->getPointerElementType();
1267       llvm::Value *TempMem = Builder.CreateAlloca(ElementType, nullptr, "caller_copy");
1268       Builder.CreateStore(Builder.CreateLoad(CombinerArg_other), TempMem);
1269       Builder.CreateCall(FnAccumulator, { CombinerArg_accum, TempMem });
1270     } else {
1271       llvm::Value *TypeAdjustedOther = CombinerArg_other;
1272       if (AccumulatorArgType->getPointerElementType() != AccumulatorArg_in->getType()) {
1273         // Call lowering by frontend has done some type coercion
1274         TypeAdjustedOther = Builder.CreatePointerCast(CombinerArg_other,
1275                                                       AccumulatorArg_in->getType()->getPointerTo(),
1276                                                       "cast");
1277       }
1278       llvm::Value *DerefOther = Builder.CreateLoad(TypeAdjustedOther);
1279       Builder.CreateCall(FnAccumulator, { CombinerArg_accum, DerefOther });
1280     }
1281     Builder.CreateRetVoid();
1282 
1283     return true;
1284   }
1285 
1286   /// @brief Checks if pointers to allocation internals are exposed
1287   ///
1288   /// This function verifies if through the parameters passed to the kernel
1289   /// or through calls to the runtime library the script gains access to
1290   /// pointers pointing to data within a RenderScript Allocation.
1291   /// If we know we control all loads from and stores to data within
1292   /// RenderScript allocations and if we know the run-time internal accesses
1293   /// are all annotated with RenderScript TBAA metadata, only then we
1294   /// can safely use TBAA to distinguish between generic and from-allocation
1295   /// pointers.
allocPointersExposed(llvm::Module & Module)1296   bool allocPointersExposed(llvm::Module &Module) {
1297     // Old style kernel function can expose pointers to elements within
1298     // allocations.
1299     // TODO: Extend analysis to allow simple cases of old-style kernels.
1300     for (size_t i = 0; i < mExportForEachCount; ++i) {
1301       const char *Name = mExportForEachNameList[i];
1302       uint32_t Signature = mExportForEachSignatureList[i];
1303       if (Module.getFunction(Name) &&
1304           !bcinfo::MetadataExtractor::hasForEachSignatureKernel(Signature)) {
1305         return true;
1306       }
1307     }
1308 
1309     // Check for library functions that expose a pointer to an Allocation or
1310     // that are not yet annotated with RenderScript-specific tbaa information.
1311     static const std::vector<const char *> Funcs{
1312       // rsGetElementAt(...)
1313       "_Z14rsGetElementAt13rs_allocationj",
1314       "_Z14rsGetElementAt13rs_allocationjj",
1315       "_Z14rsGetElementAt13rs_allocationjjj",
1316 
1317       // rsSetElementAt()
1318       "_Z14rsSetElementAt13rs_allocationPvj",
1319       "_Z14rsSetElementAt13rs_allocationPvjj",
1320       "_Z14rsSetElementAt13rs_allocationPvjjj",
1321 
1322       // rsGetElementAtYuv_uchar_Y()
1323       "_Z25rsGetElementAtYuv_uchar_Y13rs_allocationjj",
1324 
1325       // rsGetElementAtYuv_uchar_U()
1326       "_Z25rsGetElementAtYuv_uchar_U13rs_allocationjj",
1327 
1328       // rsGetElementAtYuv_uchar_V()
1329       "_Z25rsGetElementAtYuv_uchar_V13rs_allocationjj",
1330     };
1331 
1332     for (auto FI : Funcs) {
1333       llvm::Function *Function = Module.getFunction(FI);
1334 
1335       if (!Function) {
1336         ALOGE("Missing run-time function '%s'", FI);
1337         return true;
1338       }
1339 
1340       if (Function->getNumUses() > 0) {
1341         return true;
1342       }
1343     }
1344 
1345     return false;
1346   }
1347 
1348   /// @brief Connect RenderScript TBAA metadata to C/C++ metadata
1349   ///
1350   /// The TBAA metadata used to annotate loads/stores from RenderScript
1351   /// Allocations is generated in a separate TBAA tree with a
1352   /// "RenderScript Distinct TBAA" root node. LLVM does assume may-alias for
1353   /// all nodes in unrelated alias analysis trees. This function makes the
1354   /// "RenderScript TBAA" node (which is parented by the Distinct TBAA root),
1355   /// a subtree of the normal C/C++ TBAA tree aside of normal C/C++ types. With
1356   /// the connected trees every access to an Allocation is resolved to
1357   /// must-alias if compared to a normal C/C++ access.
connectRenderScriptTBAAMetadata(llvm::Module & Module)1358   void connectRenderScriptTBAAMetadata(llvm::Module &Module) {
1359     llvm::MDBuilder MDHelper(*Context);
1360     llvm::MDNode *TBAARenderScriptDistinct =
1361       MDHelper.createTBAARoot("RenderScript Distinct TBAA");
1362     llvm::MDNode *TBAARenderScript = MDHelper.createTBAANode(
1363         "RenderScript TBAA", TBAARenderScriptDistinct);
1364     llvm::MDNode *TBAARoot     = MDHelper.createTBAARoot("Simple C/C++ TBAA");
1365     TBAARenderScript->replaceOperandWith(1, TBAARoot);
1366   }
1367 
runOnModule(llvm::Module & Module)1368   virtual bool runOnModule(llvm::Module &Module) {
1369     bool Changed  = false;
1370     this->Module  = &Module;
1371     Context = &Module.getContext();
1372 
1373     buildTypes();
1374 
1375     bcinfo::MetadataExtractor me(&Module);
1376     if (!me.extract()) {
1377       ALOGE("Could not extract metadata from module!");
1378       return false;
1379     }
1380 
1381     mStructExplicitlyPaddedBySlang = (me.getCompilerVersion() >= SlangVersion::N_STRUCT_EXPLICIT_PADDING);
1382 
1383     // Expand forEach_* style kernels.
1384     mExportForEachCount = me.getExportForEachSignatureCount();
1385     mExportForEachNameList = me.getExportForEachNameList();
1386     mExportForEachSignatureList = me.getExportForEachSignatureList();
1387 
1388     for (size_t i = 0; i < mExportForEachCount; ++i) {
1389       const char *name = mExportForEachNameList[i];
1390       uint32_t signature = mExportForEachSignatureList[i];
1391       llvm::Function *kernel = Module.getFunction(name);
1392       if (kernel) {
1393         if (bcinfo::MetadataExtractor::hasForEachSignatureKernel(signature)) {
1394           Changed |= ExpandForEach(kernel, signature);
1395           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1396         } else if (kernel->getReturnType()->isVoidTy()) {
1397           Changed |= ExpandOldStyleForEach(kernel, signature);
1398           kernel->setLinkage(llvm::GlobalValue::InternalLinkage);
1399         } else {
1400           // There are some graphics root functions that are not
1401           // expanded, but that will be called directly. For those
1402           // functions, we can not set the linkage to internal.
1403         }
1404       }
1405     }
1406 
1407     // Process general reduce_* style functions.
1408     const size_t ExportReduceCount = me.getExportReduceCount();
1409     const bcinfo::MetadataExtractor::Reduce *ExportReduceList = me.getExportReduceList();
1410     //   Note that functions can be shared between kernels
1411     FunctionSet PromotedFunctions, ExpandedAccumulators, AccumulatorsForCombiners;
1412 
1413     for (size_t i = 0; i < ExportReduceCount; ++i) {
1414       Changed |= PromoteReduceFunction(ExportReduceList[i].mInitializerName, PromotedFunctions);
1415       Changed |= PromoteReduceFunction(ExportReduceList[i].mCombinerName, PromotedFunctions);
1416       Changed |= PromoteReduceFunction(ExportReduceList[i].mOutConverterName, PromotedFunctions);
1417 
1418       // Accumulator
1419       llvm::Function *accumulator = Module.getFunction(ExportReduceList[i].mAccumulatorName);
1420       bccAssert(accumulator != nullptr);
1421       if (ExpandedAccumulators.insert(accumulator).second)
1422         Changed |= ExpandReduceAccumulator(accumulator,
1423                                            ExportReduceList[i].mSignature,
1424                                            ExportReduceList[i].mInputCount);
1425       if (!ExportReduceList[i].mCombinerName) {
1426         if (AccumulatorsForCombiners.insert(accumulator).second)
1427           Changed |= CreateReduceCombinerFromAccumulator(accumulator);
1428       }
1429     }
1430 
1431     if (gEnableRsTbaa && !allocPointersExposed(Module)) {
1432       connectRenderScriptTBAAMetadata(Module);
1433     }
1434 
1435     return Changed;
1436   }
1437 
getPassName() const1438   virtual const char *getPassName() const {
1439     return "forEach_* and reduce_* function expansion";
1440   }
1441 
1442 }; // end RSKernelExpandPass
1443 
1444 } // end anonymous namespace
1445 
1446 char RSKernelExpandPass::ID = 0;
1447 static llvm::RegisterPass<RSKernelExpandPass> X("kernelexp", "Kernel Expand Pass");
1448 
1449 namespace bcc {
1450 
1451 const char BCC_INDEX_VAR_NAME[] = "rsIndex";
1452 
1453 llvm::ModulePass *
createRSKernelExpandPass(bool pEnableStepOpt)1454 createRSKernelExpandPass(bool pEnableStepOpt) {
1455   return new RSKernelExpandPass(pEnableStepOpt);
1456 }
1457 
1458 } // end namespace bcc
1459