1 /*
2  * Copyright 2010-2012, The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "bcc/Compiler.h"
18 
19 #include <llvm/Analysis/Passes.h>
20 #include <llvm/Analysis/TargetTransformInfo.h>
21 #include <llvm/CodeGen/RegAllocRegistry.h>
22 #include <llvm/IR/LegacyPassManager.h>
23 #include <llvm/IR/Module.h>
24 #include <llvm/Support/TargetRegistry.h>
25 #include <llvm/Support/raw_ostream.h>
26 #include <llvm/IR/DataLayout.h>
27 #include <llvm/Target/TargetSubtargetInfo.h>
28 #include <llvm/Target/TargetMachine.h>
29 #include <llvm/Transforms/IPO.h>
30 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
31 #include <llvm/Transforms/Scalar.h>
32 #include <llvm/Transforms/Vectorize.h>
33 
34 #include "bcc/Assert.h"
35 #include "bcc/Renderscript/RSScript.h"
36 #include "bcc/Renderscript/RSTransforms.h"
37 #include "bcc/Script.h"
38 #include "bcc/Source.h"
39 #include "bcc/Support/CompilerConfig.h"
40 #include "bcc/Support/Log.h"
41 #include "bcc/Support/OutputFile.h"
42 #include "bcinfo/MetadataExtractor.h"
43 #include "rsDefines.h"
44 
45 #include <string>
46 
47 using namespace bcc;
48 
GetErrorString(enum ErrorCode pErrCode)49 const char *Compiler::GetErrorString(enum ErrorCode pErrCode) {
50   switch (pErrCode) {
51   case kSuccess:
52     return "Successfully compiled.";
53   case kInvalidConfigNoTarget:
54     return "Invalid compiler config supplied (getTarget() returns nullptr.) "
55            "(missing call to CompilerConfig::initialize()?)";
56   case kErrCreateTargetMachine:
57     return "Failed to create llvm::TargetMachine.";
58   case kErrSwitchTargetMachine:
59     return  "Failed to switch llvm::TargetMachine.";
60   case kErrNoTargetMachine:
61     return "Failed to compile the script since there's no available "
62            "TargetMachine. (missing call to Compiler::config()?)";
63   case kErrMaterialization:
64     return "Failed to materialize the module.";
65   case kErrInvalidOutputFileState:
66     return "Supplied output file was invalid (in the error state.)";
67   case kErrPrepareOutput:
68     return "Failed to prepare file for output.";
69   case kPrepareCodeGenPass:
70     return "Failed to construct pass list for code-generation.";
71   case kErrCustomPasses:
72     return "Error occurred while adding custom passes.";
73   case kErrInvalidSource:
74     return "Error loading input bitcode";
75   case kIllegalGlobalFunction:
76     return "Use of undefined external function";
77   }
78 
79   // This assert should never be reached as the compiler verifies that the
80   // above switch coveres all enum values.
81   assert(false && "Unknown error code encountered");
82   return  "";
83 }
84 
85 //===----------------------------------------------------------------------===//
86 // Instance Methods
87 //===----------------------------------------------------------------------===//
Compiler()88 Compiler::Compiler() : mTarget(nullptr), mEnableOpt(true) {
89   return;
90 }
91 
Compiler(const CompilerConfig & pConfig)92 Compiler::Compiler(const CompilerConfig &pConfig) : mTarget(nullptr),
93                                                     mEnableOpt(true) {
94   const std::string &triple = pConfig.getTriple();
95 
96   enum ErrorCode err = config(pConfig);
97   if (err != kSuccess) {
98     ALOGE("%s (%s, features: %s)", GetErrorString(err),
99           triple.c_str(), pConfig.getFeatureString().c_str());
100     return;
101   }
102 
103   return;
104 }
105 
config(const CompilerConfig & pConfig)106 enum Compiler::ErrorCode Compiler::config(const CompilerConfig &pConfig) {
107   if (pConfig.getTarget() == nullptr) {
108     return kInvalidConfigNoTarget;
109   }
110 
111   llvm::TargetMachine *new_target =
112       (pConfig.getTarget())->createTargetMachine(pConfig.getTriple(),
113                                                  pConfig.getCPU(),
114                                                  pConfig.getFeatureString(),
115                                                  pConfig.getTargetOptions(),
116                                                  pConfig.getRelocationModel(),
117                                                  pConfig.getCodeModel(),
118                                                  pConfig.getOptimizationLevel());
119 
120   if (new_target == nullptr) {
121     return ((mTarget != nullptr) ? kErrSwitchTargetMachine :
122                                    kErrCreateTargetMachine);
123   }
124 
125   // Replace the old TargetMachine.
126   delete mTarget;
127   mTarget = new_target;
128 
129   // Adjust register allocation policy according to the optimization level.
130   //  createFastRegisterAllocator: fast but bad quality
131   //  createLinearScanRegisterAllocator: not so fast but good quality
132   if ((pConfig.getOptimizationLevel() == llvm::CodeGenOpt::None)) {
133     llvm::RegisterRegAlloc::setDefault(llvm::createFastRegisterAllocator);
134   } else {
135     llvm::RegisterRegAlloc::setDefault(llvm::createGreedyRegisterAllocator);
136   }
137 
138   return kSuccess;
139 }
140 
~Compiler()141 Compiler::~Compiler() {
142   delete mTarget;
143 }
144 
145 
runPasses(Script & pScript,llvm::raw_pwrite_stream & pResult)146 enum Compiler::ErrorCode Compiler::runPasses(Script &pScript,
147                                              llvm::raw_pwrite_stream &pResult) {
148   // Pass manager for link-time optimization
149   llvm::legacy::PassManager passes;
150 
151   // Empty MCContext.
152   llvm::MCContext *mc_context = nullptr;
153 
154   passes.add(createTargetTransformInfoWrapperPass(mTarget->getTargetIRAnalysis()));
155 
156   // Add our custom passes.
157   if (!addCustomPasses(pScript, passes)) {
158     return kErrCustomPasses;
159   }
160 
161   if (mTarget->getOptLevel() == llvm::CodeGenOpt::None) {
162     passes.add(llvm::createGlobalOptimizerPass());
163     passes.add(llvm::createConstantMergePass());
164 
165   } else {
166     // FIXME: Figure out which passes should be executed.
167     llvm::PassManagerBuilder Builder;
168     Builder.Inliner = llvm::createFunctionInliningPass();
169     Builder.populateLTOPassManager(passes);
170 
171     /* FIXME: Reenable autovectorization after rebase.
172        bug 19324423
173     // Add vectorization passes after LTO passes are in
174     // additional flag: -unroll-runtime
175     passes.add(llvm::createLoopUnrollPass(-1, 16, 0, 1));
176     // Need to pass appropriate flags here: -scalarize-load-store
177     passes.add(llvm::createScalarizerPass());
178     passes.add(llvm::createCFGSimplificationPass());
179     passes.add(llvm::createScopedNoAliasAAPass());
180     passes.add(llvm::createScalarEvolutionAliasAnalysisPass());
181     // additional flags: -slp-vectorize-hor -slp-vectorize-hor-store (unnecessary?)
182     passes.add(llvm::createSLPVectorizerPass());
183     passes.add(llvm::createDeadCodeEliminationPass());
184     passes.add(llvm::createInstructionCombiningPass());
185     */
186   }
187 
188   // These passes have to come after LTO, since we don't want to examine
189   // functions that are never actually called.
190   if (!addPostLTOCustomPasses(passes)) {
191     return kErrCustomPasses;
192   }
193 
194   // RSEmbedInfoPass needs to come after we have scanned for non-threadable
195   // functions.
196   // Script passed to RSCompiler must be a RSScript.
197   RSScript &script = static_cast<RSScript &>(pScript);
198   if (script.getEmbedInfo())
199     passes.add(createRSEmbedInfoPass());
200 
201   // Add passes to the pass manager to emit machine code through MC layer.
202   if (mTarget->addPassesToEmitMC(passes, mc_context, pResult,
203                                  /* DisableVerify */false)) {
204     return kPrepareCodeGenPass;
205   }
206 
207   // Execute the passes.
208   passes.run(pScript.getSource().getModule());
209 
210   return kSuccess;
211 }
212 
compile(Script & pScript,llvm::raw_pwrite_stream & pResult,llvm::raw_ostream * IRStream)213 enum Compiler::ErrorCode Compiler::compile(Script &pScript,
214                                            llvm::raw_pwrite_stream &pResult,
215                                            llvm::raw_ostream *IRStream) {
216   llvm::Module &module = pScript.getSource().getModule();
217   enum ErrorCode err;
218 
219   if (mTarget == nullptr) {
220     return kErrNoTargetMachine;
221   }
222 
223   const std::string &triple = module.getTargetTriple();
224   const llvm::DataLayout *dl = getTargetMachine().getDataLayout();
225   unsigned int pointerSize = dl->getPointerSizeInBits();
226   if (triple == "armv7-none-linux-gnueabi") {
227     if (pointerSize != 32) {
228       return kErrInvalidSource;
229     }
230   } else if (triple == "aarch64-none-linux-gnueabi") {
231     if (pointerSize != 64) {
232       return kErrInvalidSource;
233     }
234   } else {
235     return kErrInvalidSource;
236   }
237 
238   // Materialize the bitcode module.
239   if (module.getMaterializer() != nullptr) {
240     // A module with non-null materializer means that it is a lazy-load module.
241     // Materialize it now via invoking MaterializeAllPermanently(). This
242     // function returns false when the materialization is successful.
243     std::error_code ec = module.materializeAllPermanently();
244     if (ec) {
245       ALOGE("Failed to materialize the module `%s'! (%s)",
246             module.getModuleIdentifier().c_str(), ec.message().c_str());
247       return kErrMaterialization;
248     }
249   }
250 
251   if ((err = runPasses(pScript, pResult)) != kSuccess) {
252     return err;
253   }
254 
255   if (IRStream) {
256     *IRStream << module;
257   }
258 
259   return kSuccess;
260 }
261 
compile(Script & pScript,OutputFile & pResult,llvm::raw_ostream * IRStream)262 enum Compiler::ErrorCode Compiler::compile(Script &pScript,
263                                            OutputFile &pResult,
264                                            llvm::raw_ostream *IRStream) {
265   // Check the state of the specified output file.
266   if (pResult.hasError()) {
267     return kErrInvalidOutputFileState;
268   }
269 
270   // Open the output file decorated in llvm::raw_ostream.
271   llvm::raw_pwrite_stream *out = pResult.dup();
272   if (out == nullptr) {
273     return kErrPrepareOutput;
274   }
275 
276   // Delegate the request.
277   enum Compiler::ErrorCode err = compile(pScript, *out, IRStream);
278 
279   // Close the output before return.
280   delete out;
281 
282   return err;
283 }
284 
addInternalizeSymbolsPass(Script & pScript,llvm::legacy::PassManager & pPM)285 bool Compiler::addInternalizeSymbolsPass(Script &pScript, llvm::legacy::PassManager &pPM) {
286   // Add a pass to internalize the symbols that don't need to have global
287   // visibility.
288   RSScript &script = static_cast<RSScript &>(pScript);
289   llvm::Module &module = script.getSource().getModule();
290   bcinfo::MetadataExtractor me(&module);
291   if (!me.extract()) {
292     bccAssert(false && "Could not extract metadata for module!");
293     return false;
294   }
295 
296   // The vector contains the symbols that should not be internalized.
297   std::vector<const char *> export_symbols;
298 
299   const char *sf[] = {
300     kRoot,               // Graphics drawing function or compute kernel.
301     kInit,               // Initialization routine called implicitly on startup.
302     kRsDtor,             // Static global destructor for a script instance.
303     kRsInfo,             // Variable containing string of RS metadata info.
304     kRsGlobalEntries,    // Optional number of global variables.
305     kRsGlobalNames,      // Optional global variable name info.
306     kRsGlobalAddresses,  // Optional global variable address info.
307     kRsGlobalSizes,      // Optional global variable size info.
308     kRsGlobalProperties, // Optional global variable properties.
309     nullptr              // Must be nullptr-terminated.
310   };
311   const char **special_functions = sf;
312   // Special RS functions should always be global symbols.
313   while (*special_functions != nullptr) {
314     export_symbols.push_back(*special_functions);
315     special_functions++;
316   }
317 
318   // Visibility of symbols appeared in rs_export_var and rs_export_func should
319   // also be preserved.
320   size_t exportVarCount = me.getExportVarCount();
321   size_t exportFuncCount = me.getExportFuncCount();
322   size_t exportForEachCount = me.getExportForEachSignatureCount();
323   const char **exportVarNameList = me.getExportVarNameList();
324   const char **exportFuncNameList = me.getExportFuncNameList();
325   const char **exportForEachNameList = me.getExportForEachNameList();
326   size_t i;
327 
328   for (i = 0; i < exportVarCount; ++i) {
329     export_symbols.push_back(exportVarNameList[i]);
330   }
331 
332   for (i = 0; i < exportFuncCount; ++i) {
333     export_symbols.push_back(exportFuncNameList[i]);
334   }
335 
336   // Expanded foreach functions should not be internalized, too.
337   // expanded_foreach_funcs keeps the .expand version of the kernel names
338   // around until createInternalizePass() is finished making its own
339   // copy of the visible symbols.
340   std::vector<std::string> expanded_foreach_funcs;
341   for (i = 0; i < exportForEachCount; ++i) {
342     expanded_foreach_funcs.push_back(
343         std::string(exportForEachNameList[i]) + ".expand");
344   }
345 
346   for (i = 0; i < exportForEachCount; i++) {
347       export_symbols.push_back(expanded_foreach_funcs[i].c_str());
348   }
349 
350   pPM.add(llvm::createInternalizePass(export_symbols));
351 
352   return true;
353 }
354 
addInvokeHelperPass(llvm::legacy::PassManager & pPM)355 bool Compiler::addInvokeHelperPass(llvm::legacy::PassManager &pPM) {
356   llvm::Triple arch(getTargetMachine().getTargetTriple());
357   if (arch.isArch64Bit()) {
358     pPM.add(createRSInvokeHelperPass());
359   }
360   return true;
361 }
362 
addExpandForEachPass(Script & pScript,llvm::legacy::PassManager & pPM)363 bool Compiler::addExpandForEachPass(Script &pScript, llvm::legacy::PassManager &pPM) {
364   // Expand ForEach on CPU path to reduce launch overhead.
365   bool pEnableStepOpt = true;
366   pPM.add(createRSForEachExpandPass(pEnableStepOpt));
367 
368   return true;
369 }
370 
addGlobalInfoPass(Script & pScript,llvm::legacy::PassManager & pPM)371 bool Compiler::addGlobalInfoPass(Script &pScript, llvm::legacy::PassManager &pPM) {
372   // Add additional information about RS global variables inside the Module.
373   RSScript &script = static_cast<RSScript &>(pScript);
374   if (script.getEmbedGlobalInfo()) {
375     pPM.add(createRSGlobalInfoPass(script.getEmbedGlobalInfoSkipConstant()));
376   }
377 
378   return true;
379 }
380 
addInvariantPass(llvm::legacy::PassManager & pPM)381 bool Compiler::addInvariantPass(llvm::legacy::PassManager &pPM) {
382   // Mark Loads from RsExpandKernelDriverInfo as "load.invariant".
383   // Should run after ExpandForEach and before inlining.
384   pPM.add(createRSInvariantPass());
385 
386   return true;
387 }
388 
addCustomPasses(Script & pScript,llvm::legacy::PassManager & pPM)389 bool Compiler::addCustomPasses(Script &pScript, llvm::legacy::PassManager &pPM) {
390   if (!addInvokeHelperPass(pPM))
391     return false;
392 
393   if (!addExpandForEachPass(pScript, pPM))
394     return false;
395 
396   if (!addInvariantPass(pPM))
397     return false;
398 
399   if (!addInternalizeSymbolsPass(pScript, pPM))
400     return false;
401 
402   if (!addGlobalInfoPass(pScript, pPM))
403     return false;
404 
405   return true;
406 }
407 
addPostLTOCustomPasses(llvm::legacy::PassManager & pPM)408 bool Compiler::addPostLTOCustomPasses(llvm::legacy::PassManager &pPM) {
409   // Add pass to correct calling convention for X86-64.
410   llvm::Triple arch(getTargetMachine().getTargetTriple());
411   if (arch.getArch() == llvm::Triple::x86_64)
412     pPM.add(createRSX86_64CallConvPass());
413 
414   // Add pass to mark script as threadable.
415   pPM.add(createRSIsThreadablePass());
416 
417   return true;
418 }
419 
screenGlobalFunctions(Script & pScript)420 enum Compiler::ErrorCode Compiler::screenGlobalFunctions(Script &pScript) {
421   llvm::Module &module = pScript.getSource().getModule();
422 
423   // Materialize the bitcode module in case this is a lazy-load module.  Do not
424   // clear the materializer by calling materializeAllPermanently since the
425   // runtime library has not been merged into the module yet.
426   if (module.getMaterializer() != nullptr) {
427     std::error_code ec = module.materializeAll();
428     if (ec) {
429       ALOGE("Failed to materialize module `%s' when screening globals! (%s)",
430             module.getModuleIdentifier().c_str(), ec.message().c_str());
431       return kErrMaterialization;
432     }
433   }
434 
435   // Add pass to check for illegal function calls.
436   llvm::legacy::PassManager pPM;
437   pPM.add(createRSScreenFunctionsPass());
438   pPM.run(module);
439 
440   return kSuccess;
441 
442 }
443