1 //===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the ValueEnumerator class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "ValueEnumerator.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/ADT/SmallPtrSet.h"
17 #include "llvm/IR/Constants.h"
18 #include "llvm/IR/DebugInfoMetadata.h"
19 #include "llvm/IR/DerivedTypes.h"
20 #include "llvm/IR/Instructions.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/IR/ValueSymbolTable.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/raw_ostream.h"
25 #include <algorithm>
26 using namespace llvm;
27 
28 namespace llvm_2_9 {
29 
isIntOrIntVectorValue(const std::pair<const Value *,unsigned> & V)30 static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
31   return V.first->getType()->isIntOrIntVectorTy();
32 }
33 
34 /// ValueEnumerator - Enumerate module-level information.
ValueEnumerator(const llvm::Module & M)35 ValueEnumerator::ValueEnumerator(const llvm::Module &M)
36     : HasMDString(false), HasDILocation(false) {
37   // Enumerate the global variables.
38   for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end();
39        I != E; ++I)
40     EnumerateValue(&*I);
41 
42   // Enumerate the functions.
43   for (llvm::Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
44     EnumerateValue(&*I);
45     EnumerateAttributes(cast<Function>(I)->getAttributes());
46   }
47 
48   // Enumerate the aliases.
49   for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
50        I != E; ++I)
51     EnumerateValue(&*I);
52 
53   // Remember what is the cutoff between globalvalue's and other constants.
54   unsigned FirstConstant = Values.size();
55 
56   // Enumerate the global variable initializers.
57   for (llvm::Module::const_global_iterator I = M.global_begin(), E = M.global_end();
58        I != E; ++I)
59     if (I->hasInitializer())
60       EnumerateValue(I->getInitializer());
61 
62   // Enumerate the aliasees.
63   for (llvm::Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
64        I != E; ++I)
65     EnumerateValue(I->getAliasee());
66 
67   // Enumerate the metadata type.
68   //
69   // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
70   // only encodes the metadata type when it's used as a value.
71   EnumerateType(Type::getMetadataTy(M.getContext()));
72 
73   // Insert constants and metadata that are named at module level into the slot
74   // pool so that the module symbol table can refer to them...
75   EnumerateValueSymbolTable(M.getValueSymbolTable());
76   EnumerateNamedMetadata(M);
77 
78   SmallVector<std::pair<unsigned, MDNode *>, 8> MDs;
79 
80   // Enumerate types used by function bodies and argument lists.
81   for (const Function &F : M) {
82     for (const Argument &A : F.args())
83       EnumerateType(A.getType());
84 
85     for (const BasicBlock &BB : F)
86       for (const Instruction &I : BB) {
87         for (const Use &Op : I.operands()) {
88           auto *MD = dyn_cast<MetadataAsValue>(&Op);
89           if (!MD) {
90             EnumerateOperandType(Op);
91             continue;
92           }
93 
94           // Local metadata is enumerated during function-incorporation.
95           if (isa<LocalAsMetadata>(MD->getMetadata()))
96             continue;
97 
98           EnumerateMetadata(MD->getMetadata());
99         }
100         EnumerateType(I.getType());
101         if (const CallInst *CI = dyn_cast<CallInst>(&I))
102           EnumerateAttributes(CI->getAttributes());
103         else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I))
104           EnumerateAttributes(II->getAttributes());
105 
106         // Enumerate metadata attached with this instruction.
107         MDs.clear();
108         I.getAllMetadataOtherThanDebugLoc(MDs);
109         for (unsigned i = 0, e = MDs.size(); i != e; ++i)
110           EnumerateMetadata(MDs[i].second);
111 
112         // Don't enumerate the location directly -- it has a special record
113         // type -- but enumerate its operands.
114         if (DILocation *L = I.getDebugLoc())
115           EnumerateMDNodeOperands(L);
116       }
117   }
118 
119   // Optimize constant ordering.
120   OptimizeConstants(FirstConstant, Values.size());
121 }
122 
getInstructionID(const Instruction * Inst) const123 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
124   InstructionMapType::const_iterator I = InstructionMap.find(Inst);
125   assert(I != InstructionMap.end() && "Instruction is not mapped!");
126   return I->second;
127 }
128 
setInstructionID(const Instruction * I)129 void ValueEnumerator::setInstructionID(const Instruction *I) {
130   InstructionMap[I] = InstructionCount++;
131 }
132 
getValueID(const Value * V) const133 unsigned ValueEnumerator::getValueID(const Value *V) const {
134   if (auto *MD = dyn_cast<MetadataAsValue>(V))
135     return getMetadataID(MD->getMetadata());
136 
137   ValueMapType::const_iterator I = ValueMap.find(V);
138   assert(I != ValueMap.end() && "Value not in slotcalculator!");
139   return I->second-1;
140 }
141 
dump() const142 void ValueEnumerator::dump() const {
143   print(dbgs(), ValueMap, "Default");
144   dbgs() << '\n';
145   print(dbgs(), MDValueMap, "MetaData");
146   dbgs() << '\n';
147 }
148 
print(raw_ostream & OS,const ValueMapType & Map,const char * Name) const149 void ValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
150                             const char *Name) const {
151 
152   OS << "Map Name: " << Name << "\n";
153   OS << "Size: " << Map.size() << "\n";
154   for (ValueMapType::const_iterator I = Map.begin(),
155          E = Map.end(); I != E; ++I) {
156 
157     const Value *V = I->first;
158     if (V->hasName())
159       OS << "Value: " << V->getName();
160     else
161       OS << "Value: [null]\n";
162     V->dump();
163 
164     OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
165     for (const Use &U : V->uses()) {
166       if (&U != &*V->use_begin())
167         OS << ",";
168       if(U->hasName())
169         OS << " " << U->getName();
170       else
171         OS << " [null]";
172 
173     }
174     OS <<  "\n\n";
175   }
176 }
177 
print(llvm::raw_ostream & OS,const MetadataMapType & Map,const char * Name) const178 void ValueEnumerator::print(llvm::raw_ostream &OS, const MetadataMapType &Map,
179                             const char *Name) const {
180 
181   OS << "Map Name: " << Name << "\n";
182   OS << "Size: " << Map.size() << "\n";
183   for (auto I = Map.begin(), E = Map.end(); I != E; ++I) {
184     const llvm::Metadata *MD = I->first;
185     OS << "Metadata: slot = " << I->second << "\n";
186     MD->print(OS);
187   }
188 }
189 
190 
191 // Optimize constant ordering.
192 namespace {
193   struct CstSortPredicate {
194     ValueEnumerator &VE;
CstSortPredicatellvm_2_9::__anon7f18594f0111::CstSortPredicate195     explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
operator ()llvm_2_9::__anon7f18594f0111::CstSortPredicate196     bool operator()(const std::pair<const Value*, unsigned> &LHS,
197                     const std::pair<const Value*, unsigned> &RHS) {
198       // Sort by plane.
199       if (LHS.first->getType() != RHS.first->getType())
200         return VE.getTypeID(LHS.first->getType()) <
201                VE.getTypeID(RHS.first->getType());
202       // Then by frequency.
203       return LHS.second > RHS.second;
204     }
205   };
206 }
207 
208 /// OptimizeConstants - Reorder constant pool for denser encoding.
OptimizeConstants(unsigned CstStart,unsigned CstEnd)209 void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
210   if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
211 
212   CstSortPredicate P(*this);
213   std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
214 
215   // Ensure that integer and vector of integer constants are at the start of the
216   // constant pool.  This is important so that GEP structure indices come before
217   // gep constant exprs.
218   std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
219                  isIntOrIntVectorValue);
220 
221   // Rebuild the modified portion of ValueMap.
222   for (; CstStart != CstEnd; ++CstStart)
223     ValueMap[Values[CstStart].first] = CstStart+1;
224 }
225 
226 
227 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
228 /// table into the values table.
EnumerateValueSymbolTable(const ValueSymbolTable & VST)229 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
230   for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
231        VI != VE; ++VI)
232     EnumerateValue(VI->getValue());
233 }
234 
235 /// EnumerateNamedMetadata - Insert all of the values referenced by
236 /// named metadata in the specified module.
EnumerateNamedMetadata(const llvm::Module & M)237 void ValueEnumerator::EnumerateNamedMetadata(const llvm::Module &M) {
238   for (llvm::Module::const_named_metadata_iterator I = M.named_metadata_begin(),
239                                              E = M.named_metadata_end();
240        I != E; ++I)
241     EnumerateNamedMDNode(&*I);
242 }
243 
EnumerateNamedMDNode(const NamedMDNode * MD)244 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
245   for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
246     EnumerateMetadata(MD->getOperand(i));
247 }
248 
249 /// EnumerateMDNodeOperands - Enumerate all non-function-local values
250 /// and types referenced by the given MDNode.
EnumerateMDNodeOperands(const MDNode * N)251 void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
252   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
253     Metadata *MD = N->getOperand(i);
254     if (!MD)
255       continue;
256     assert(!isa<LocalAsMetadata>(MD) && "MDNodes cannot be function-local");
257     EnumerateMetadata(MD);
258   }
259 }
260 
EnumerateMetadata(const llvm::Metadata * MD)261 void ValueEnumerator::EnumerateMetadata(const llvm::Metadata *MD) {
262   assert(
263       (isa<MDNode>(MD) || isa<MDString>(MD) || isa<ConstantAsMetadata>(MD)) &&
264       "Invalid metadata kind");
265 
266   // Insert a dummy ID to block the co-recursive call to
267   // EnumerateMDNodeOperands() from re-visiting MD in a cyclic graph.
268   //
269   // Return early if there's already an ID.
270   if (!MDValueMap.insert(std::make_pair(MD, 0)).second)
271     return;
272 
273   // Visit operands first to minimize RAUW.
274   if (auto *N = dyn_cast<MDNode>(MD))
275     EnumerateMDNodeOperands(N);
276   else if (auto *C = dyn_cast<ConstantAsMetadata>(MD))
277     EnumerateValue(C->getValue());
278 
279   HasMDString |= isa<MDString>(MD);
280   HasDILocation |= isa<DILocation>(MD);
281 
282   // Replace the dummy ID inserted above with the correct one.  MDValueMap may
283   // have changed by inserting operands, so we need a fresh lookup here.
284   MDs.push_back(MD);
285   MDValueMap[MD] = MDs.size();
286 }
287 
288 /// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
289 /// information reachable from the metadata.
EnumerateFunctionLocalMetadata(const llvm::LocalAsMetadata * Local)290 void ValueEnumerator::EnumerateFunctionLocalMetadata(
291     const llvm::LocalAsMetadata *Local) {
292   // Check to see if it's already in!
293   unsigned &MDValueID = MDValueMap[Local];
294   if (MDValueID)
295     return;
296 
297   MDs.push_back(Local);
298   MDValueID = MDs.size();
299 
300   EnumerateValue(Local->getValue());
301 
302   // Also, collect all function-local metadata for easy access.
303   FunctionLocalMDs.push_back(Local);
304 }
305 
EnumerateValue(const Value * V)306 void ValueEnumerator::EnumerateValue(const Value *V) {
307   assert(!V->getType()->isVoidTy() && "Can't insert void values!");
308   assert(!isa<MetadataAsValue>(V) && "EnumerateValue doesn't handle Metadata!");
309 
310   // Check to see if it's already in!
311   unsigned &ValueID = ValueMap[V];
312   if (ValueID) {
313     // Increment use count.
314     Values[ValueID-1].second++;
315     return;
316   }
317 
318   // Enumerate the type of this value.
319   EnumerateType(V->getType());
320 
321   if (const Constant *C = dyn_cast<Constant>(V)) {
322     if (isa<GlobalValue>(C)) {
323       // Initializers for globals are handled explicitly elsewhere.
324     } else if (C->getNumOperands()) {
325       // If a constant has operands, enumerate them.  This makes sure that if a
326       // constant has uses (for example an array of const ints), that they are
327       // inserted also.
328 
329       // We prefer to enumerate them with values before we enumerate the user
330       // itself.  This makes it more likely that we can avoid forward references
331       // in the reader.  We know that there can be no cycles in the constants
332       // graph that don't go through a global variable.
333       for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
334            I != E; ++I)
335         if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
336           EnumerateValue(*I);
337 
338       // Finally, add the value.  Doing this could make the ValueID reference be
339       // dangling, don't reuse it.
340       Values.push_back(std::make_pair(V, 1U));
341       ValueMap[V] = Values.size();
342       return;
343     } else if (const ConstantDataSequential *CDS =
344                dyn_cast<ConstantDataSequential>(C)) {
345       // For our legacy handling of the new ConstantDataSequential type, we
346       // need to enumerate the individual elements, as well as mark the
347       // outer constant as used.
348       for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
349         EnumerateValue(CDS->getElementAsConstant(i));
350       Values.push_back(std::make_pair(V, 1U));
351       ValueMap[V] = Values.size();
352       return;
353     }
354   }
355 
356   // Add the value.
357   Values.push_back(std::make_pair(V, 1U));
358   ValueID = Values.size();
359 }
360 
361 
EnumerateType(Type * Ty)362 void ValueEnumerator::EnumerateType(Type *Ty) {
363   unsigned *TypeID = &TypeMap[Ty];
364 
365   // We've already seen this type.
366   if (*TypeID)
367     return;
368 
369   // If it is a non-anonymous struct, mark the type as being visited so that we
370   // don't recursively visit it.  This is safe because we allow forward
371   // references of these in the bitcode reader.
372   if (StructType *STy = dyn_cast<StructType>(Ty))
373     if (!STy->isLiteral())
374       *TypeID = ~0U;
375 
376   // Enumerate all of the subtypes before we enumerate this type.  This ensures
377   // that the type will be enumerated in an order that can be directly built.
378   for (Type *SubTy : Ty->subtypes())
379     EnumerateType(SubTy);
380 
381   // Refresh the TypeID pointer in case the table rehashed.
382   TypeID = &TypeMap[Ty];
383 
384   // Check to see if we got the pointer another way.  This can happen when
385   // enumerating recursive types that hit the base case deeper than they start.
386   //
387   // If this is actually a struct that we are treating as forward ref'able,
388   // then emit the definition now that all of its contents are available.
389   if (*TypeID && *TypeID != ~0U)
390     return;
391 
392   // Add this type now that its contents are all happily enumerated.
393   Types.push_back(Ty);
394 
395   *TypeID = Types.size();
396 }
397 
398 // Enumerate the types for the specified value.  If the value is a constant,
399 // walk through it, enumerating the types of the constant.
EnumerateOperandType(const Value * V)400 void ValueEnumerator::EnumerateOperandType(const Value *V) {
401   EnumerateType(V->getType());
402 
403   if (auto *MD = dyn_cast<MetadataAsValue>(V)) {
404     assert(!isa<LocalAsMetadata>(MD->getMetadata()) &&
405            "Function-local metadata should be left for later");
406 
407     EnumerateMetadata(MD->getMetadata());
408     return;
409   }
410 
411   const Constant *C = dyn_cast<Constant>(V);
412   if (!C)
413     return;
414 
415   // If this constant is already enumerated, ignore it, we know its type must
416   // be enumerated.
417   if (ValueMap.count(C))
418     return;
419 
420   // This constant may have operands, make sure to enumerate the types in
421   // them.
422   for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
423     const Value *Op = C->getOperand(i);
424 
425     // Don't enumerate basic blocks here, this happens as operands to
426     // blockaddress.
427     if (isa<BasicBlock>(Op))
428       continue;
429 
430     EnumerateOperandType(Op);
431   }
432 }
433 
EnumerateAttributes(AttributeSet PAL)434 void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) {
435   if (PAL.isEmpty()) return;  // null is always 0.
436 
437   // Do a lookup.
438   unsigned &Entry = AttributeMap[PAL];
439   if (Entry == 0) {
440     // Never saw this before, add it.
441     Attribute.push_back(PAL);
442     Entry = Attribute.size();
443   }
444 
445   // Do lookups for all attribute groups.
446   for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) {
447     AttributeSet AS = PAL.getSlotAttributes(i);
448     unsigned &Entry = AttributeGroupMap[AS];
449     if (Entry == 0) {
450       AttributeGroups.push_back(AS);
451       Entry = AttributeGroups.size();
452     }
453   }
454 }
455 
incorporateFunction(const Function & F)456 void ValueEnumerator::incorporateFunction(const Function &F) {
457   InstructionCount = 0;
458   NumModuleValues = Values.size();
459   NumModuleMDs = MDs.size();
460 
461   // Adding function arguments to the value table.
462   for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
463        I != E; ++I)
464     EnumerateValue(&*I);
465 
466   FirstFuncConstantID = Values.size();
467 
468   // Add all function-level constants to the value table.
469   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
470     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
471       for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
472            OI != E; ++OI) {
473         if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
474             isa<InlineAsm>(*OI))
475           EnumerateValue(*OI);
476       }
477     BasicBlocks.push_back(&*BB);
478     ValueMap[&*BB] = BasicBlocks.size();
479   }
480 
481   // Optimize the constant layout.
482   OptimizeConstants(FirstFuncConstantID, Values.size());
483 
484   // Add the function's parameter attributes so they are available for use in
485   // the function's instruction.
486   EnumerateAttributes(F.getAttributes());
487 
488   FirstInstID = Values.size();
489 
490   SmallVector<llvm::LocalAsMetadata *, 8> FnLocalMDVector;
491   // Add all of the instructions.
492   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
493     for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
494       for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
495            OI != E; ++OI) {
496         if (auto *MD = dyn_cast<llvm::MetadataAsValue>(&*OI))
497           if (auto *Local = dyn_cast<LocalAsMetadata>(MD->getMetadata()))
498             // Enumerate metadata after the instructions they might refer to.
499             FnLocalMDVector.push_back(Local);
500       }
501 
502       if (!I->getType()->isVoidTy())
503         EnumerateValue(&*I);
504     }
505   }
506 
507   // Add all of the function-local metadata.
508   for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
509     EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
510 }
511 
purgeFunction()512 void ValueEnumerator::purgeFunction() {
513   /// Remove purged values from the ValueMap.
514   for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
515     ValueMap.erase(Values[i].first);
516   for (unsigned i = NumModuleMDs, e = MDs.size(); i != e; ++i)
517     MDValueMap.erase(MDs[i]);
518   for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
519     ValueMap.erase(BasicBlocks[i]);
520 
521   Values.resize(NumModuleValues);
522   MDs.resize(NumModuleMDs);
523   BasicBlocks.clear();
524   FunctionLocalMDs.clear();
525 }
526 
IncorporateFunctionInfoGlobalBBIDs(const Function * F,DenseMap<const BasicBlock *,unsigned> & IDMap)527 static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
528                                  DenseMap<const BasicBlock*, unsigned> &IDMap) {
529   unsigned Counter = 0;
530   for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
531     IDMap[&*BB] = ++Counter;
532 }
533 
534 /// getGlobalBasicBlockID - This returns the function-specific ID for the
535 /// specified basic block.  This is relatively expensive information, so it
536 /// should only be used by rare constructs such as address-of-label.
getGlobalBasicBlockID(const BasicBlock * BB) const537 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
538   unsigned &Idx = GlobalBasicBlockIDs[BB];
539   if (Idx != 0)
540     return Idx-1;
541 
542   IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
543   return getGlobalBasicBlockID(BB);
544 }
545 
546 } // end llvm_2_9 namespace
547