1 //===- IndenticalCodeFolding.cpp ------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "mcld/LD/IdenticalCodeFolding.h"
10 
11 #include "mcld/GeneralOptions.h"
12 #include "mcld/Module.h"
13 #include "mcld/Fragment/RegionFragment.h"
14 #include "mcld/LD/LDContext.h"
15 #include "mcld/LD/LDSection.h"
16 #include "mcld/LD/RelocData.h"
17 #include "mcld/LD/Relocator.h"
18 #include "mcld/LD/ResolveInfo.h"
19 #include "mcld/LD/SectionData.h"
20 #include "mcld/LinkerConfig.h"
21 #include "mcld/MC/Input.h"
22 #include "mcld/Support/Demangle.h"
23 #include "mcld/Support/MsgHandling.h"
24 #include "mcld/Target/GNULDBackend.h"
25 
26 #include <llvm/ADT/StringRef.h>
27 #include <llvm/Support/Casting.h>
28 #include <llvm/Support/Format.h>
29 
30 #include <cassert>
31 #include <map>
32 #include <set>
33 
34 #include <zlib.h>
35 
36 namespace mcld {
37 
isSymCtorOrDtor(const ResolveInfo & pSym)38 static bool isSymCtorOrDtor(const ResolveInfo& pSym) {
39   // We can always fold ctors and dtors since accessing function pointer in C++
40   // is forbidden.
41   llvm::StringRef name(pSym.name(), pSym.nameSize());
42   if (!name.startswith("_ZZ") && !name.startswith("_ZN")) {
43     return false;
44   }
45   return isCtorOrDtor(pSym.name(), pSym.nameSize());
46 }
47 
IdenticalCodeFolding(const LinkerConfig & pConfig,const TargetLDBackend & pBackend,Module & pModule)48 IdenticalCodeFolding::IdenticalCodeFolding(const LinkerConfig& pConfig,
49                                            const TargetLDBackend& pBackend,
50                                            Module& pModule)
51     : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule) {
52 }
53 
foldIdenticalCode()54 void IdenticalCodeFolding::foldIdenticalCode() {
55   // 1. Find folding candidates.
56   FoldingCandidates candidate_list;
57   findCandidates(candidate_list);
58 
59   // 2. Initialize constant section content
60   for (size_t i = 0; i < candidate_list.size(); ++i) {
61     candidate_list[i].initConstantContent(m_Backend, m_KeptSections);
62   }
63 
64   // 3. Find identical code until convergence
65   bool converged = false;
66   size_t iterations = 0;
67   while (!converged && (iterations < m_Config.options().getICFIterations())) {
68     converged = matchCandidates(candidate_list);
69     ++iterations;
70   }
71   if (m_Config.options().printICFSections()) {
72     debug(diag::debug_icf_iterations) << iterations;
73   }
74 
75   // 4. Fold the identical code
76   typedef std::set<Input*> FoldedObjects;
77   FoldedObjects folded_objs;
78   KeptSections::iterator kept, keptEnd = m_KeptSections.end();
79   size_t index = 0;
80   for (kept = m_KeptSections.begin(); kept != keptEnd; ++kept, ++index) {
81     LDSection* sect = (*kept).first;
82     Input* obj = (*kept).second.first;
83     size_t kept_index = (*kept).second.second;
84     if (index != kept_index) {
85       sect->setKind(LDFileFormat::Folded);
86       folded_objs.insert(obj);
87 
88       if (m_Config.options().printICFSections()) {
89         KeptSections::iterator it = m_KeptSections.begin() + kept_index;
90         LDSection* kept_sect = (*it).first;
91         Input* kept_obj = (*it).second.first;
92         debug(diag::debug_icf_folded_section) << sect->name() << obj->name()
93                                               << kept_sect->name()
94                                               << kept_obj->name();
95       }
96     }
97   }
98 
99   // Adjust the fragment reference of the folded symbols.
100   FoldedObjects::iterator fobj, fobjEnd = folded_objs.end();
101   for (fobj = folded_objs.begin(); fobj != fobjEnd; ++fobj) {
102     LDContext::sym_iterator sym, symEnd = (*fobj)->context()->symTabEnd();
103     for (sym = (*fobj)->context()->symTabBegin(); sym != symEnd; ++sym) {
104       if ((*sym)->hasFragRef() && ((*sym)->type() == ResolveInfo::Function)) {
105         LDSymbol* out_sym = (*sym)->resolveInfo()->outSymbol();
106         FragmentRef* frag_ref = out_sym->fragRef();
107         LDSection* sect = &(frag_ref->frag()->getParent()->getSection());
108         if (sect->kind() == LDFileFormat::Folded) {
109           size_t kept_index = m_KeptSections[sect].second;
110           LDSection* kept_sect = (*(m_KeptSections.begin() + kept_index)).first;
111           frag_ref->assign(kept_sect->getSectionData()->front(),
112                            frag_ref->offset());
113         }
114       }
115     }  // for each symbol
116   }    // for each folded object
117 }
118 
findCandidates(FoldingCandidates & pCandidateList)119 void IdenticalCodeFolding::findCandidates(FoldingCandidates& pCandidateList) {
120   Module::obj_iterator obj, objEnd = m_Module.obj_end();
121   for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
122     std::set<const LDSection*> funcptr_access_set;
123     typedef std::map<LDSection*, LDSection*> CandidateMap;
124     CandidateMap candidate_map;
125     LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
126     for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
127       switch ((*sect)->kind()) {
128         case LDFileFormat::TEXT: {
129           candidate_map.insert(
130               std::make_pair(*sect, reinterpret_cast<LDSection*>(NULL)));
131           break;
132         }
133         case LDFileFormat::Relocation: {
134           LDSection* target = (*sect)->getLink();
135           if (target->kind() == LDFileFormat::TEXT) {
136             candidate_map[target] = *sect;
137           }
138 
139           // Safe icf
140           if (m_Config.options().getICFMode() == GeneralOptions::ICF::Safe) {
141             RelocData::iterator rel, relEnd = (*sect)->getRelocData()->end();
142             for (rel = (*sect)->getRelocData()->begin(); rel != relEnd; ++rel) {
143               LDSymbol* sym = rel->symInfo()->outSymbol();
144               if (sym->hasFragRef() && (sym->type() == ResolveInfo::Function)) {
145                 const LDSection* def =
146                     &sym->fragRef()->frag()->getParent()->getSection();
147                 if (!isSymCtorOrDtor(*rel->symInfo()) &&
148                     m_Backend.mayHaveUnsafeFunctionPointerAccess(*target) &&
149                     m_Backend.getRelocator()
150                         ->mayHaveFunctionPointerAccess(*rel)) {
151                   funcptr_access_set.insert(def);
152                 }
153               }
154             }  // for each reloc
155           }
156 
157           break;
158         }
159         default: {
160           // skip
161           break;
162         }
163       }  // end of switch
164     }    // for each section
165 
166     CandidateMap::iterator candidate, candidateEnd = candidate_map.end();
167     for (candidate = candidate_map.begin(); candidate != candidateEnd;
168          ++candidate) {
169       if ((m_Config.options().getICFMode() == GeneralOptions::ICF::All) ||
170           (funcptr_access_set.count(candidate->first) == 0)) {
171         size_t index = m_KeptSections.size();
172         m_KeptSections[candidate->first] = ObjectAndId(*obj, index);
173         pCandidateList.push_back(
174             FoldingCandidate(candidate->first, candidate->second, *obj));
175       }
176     }  // for each possible candidate
177   }  // for each obj
178 }
179 
matchCandidates(FoldingCandidates & pCandidateList)180 bool IdenticalCodeFolding::matchCandidates(FoldingCandidates& pCandidateList) {
181   typedef std::multimap<uint32_t, size_t> ChecksumMap;
182   ChecksumMap checksum_map;
183   std::vector<std::string> contents(pCandidateList.size());
184   bool converged = true;
185 
186   for (size_t index = 0; index < pCandidateList.size(); ++index) {
187     contents[index] = pCandidateList[index].getContentWithVariables(
188         m_Backend, m_KeptSections);
189     uint32_t checksum = ::crc32(0xFFFFFFFF,
190                                 (const uint8_t*)contents[index].c_str(),
191                                 contents[index].length());
192 
193     size_t count = checksum_map.count(checksum);
194     if (count == 0) {
195       checksum_map.insert(std::make_pair(checksum, index));
196     } else {
197       std::pair<ChecksumMap::iterator, ChecksumMap::iterator> ret =
198           checksum_map.equal_range(checksum);
199       for (ChecksumMap::iterator it = ret.first; it != ret.second; ++it) {
200         size_t kept_index = (*it).second;
201         if (contents[index].compare(contents[kept_index]) == 0) {
202           m_KeptSections[pCandidateList[index].sect].second = kept_index;
203           converged = false;
204           break;
205         }
206       }
207     }
208   }
209 
210   return converged;
211 }
212 
initConstantContent(const TargetLDBackend & pBackend,const IdenticalCodeFolding::KeptSections & pKeptSections)213 void IdenticalCodeFolding::FoldingCandidate::initConstantContent(
214     const TargetLDBackend& pBackend,
215     const IdenticalCodeFolding::KeptSections& pKeptSections) {
216   // Get the static content from text.
217   assert(sect != NULL && sect->hasSectionData());
218   SectionData::const_iterator frag, fragEnd = sect->getSectionData()->end();
219   for (frag = sect->getSectionData()->begin(); frag != fragEnd; ++frag) {
220     switch (frag->getKind()) {
221       case Fragment::Region: {
222         const RegionFragment& region = llvm::cast<RegionFragment>(*frag);
223         content.append(region.getRegion().begin(), region.size());
224         break;
225       }
226       default: {
227         // FIXME: Currently we only take care of RegionFragment.
228         break;
229       }
230     }
231   }
232 
233   // Get the static content from relocs.
234   if (reloc_sect != NULL && reloc_sect->hasRelocData()) {
235     RelocData::iterator rel, relEnd = reloc_sect->getRelocData()->end();
236     for (rel = reloc_sect->getRelocData()->begin(); rel != relEnd; ++rel) {
237       llvm::format_object<Relocation::Type,
238                           Relocation::Address,
239                           Relocation::Address,
240                           Relocation::Address> rel_info("%x%llx%llx%llx",
241                                                         rel->type(),
242                                                         rel->symValue(),
243                                                         rel->addend(),
244                                                         rel->place());
245       char rel_str[48];
246       rel_info.print(rel_str, sizeof(rel_str));
247       content.append(rel_str);
248 
249       // Handle the recursive call.
250       LDSymbol* sym = rel->symInfo()->outSymbol();
251       if ((sym->type() == ResolveInfo::Function) && sym->hasFragRef()) {
252         LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
253         if (def == sect) {
254           continue;
255         }
256       }
257 
258       if (!pBackend.isSymbolPreemptible(*rel->symInfo()) && sym->hasFragRef() &&
259           (pKeptSections.find(
260                &sym->fragRef()->frag()->getParent()->getSection()) !=
261            pKeptSections.end())) {
262         // Mark this reloc as a variable.
263         variable_relocs.push_back(rel);
264       } else {
265         // TODO: Support inlining merge sections if possible (target-dependent).
266         if ((sym->binding() == ResolveInfo::Local) ||
267             (sym->binding() == ResolveInfo::Absolute)) {
268           // ABS or Local symbols.
269           content.append(sym->name()).append(obj->name()).append(
270               obj->path().native());
271         } else {
272           content.append(sym->name());
273         }
274       }
275     }
276   }
277 }
278 
getContentWithVariables(const TargetLDBackend & pBackend,const IdenticalCodeFolding::KeptSections & pKeptSections)279 std::string IdenticalCodeFolding::FoldingCandidate::getContentWithVariables(
280     const TargetLDBackend& pBackend,
281     const IdenticalCodeFolding::KeptSections& pKeptSections) {
282   std::string result(content);
283   // Compute the variable content from relocs.
284   std::vector<Relocation*>::const_iterator rel, relEnd = variable_relocs.end();
285   for (rel = variable_relocs.begin(); rel != relEnd; ++rel) {
286     LDSymbol* sym = (*rel)->symInfo()->outSymbol();
287     LDSection* def = &sym->fragRef()->frag()->getParent()->getSection();
288     // Use the kept section index.
289     KeptSections::const_iterator it = pKeptSections.find(def);
290     llvm::format_object<size_t> kept_info("%x", (*it).second.second);
291     char kept_str[8];
292     kept_info.print(kept_str, sizeof(kept_str));
293     result.append(kept_str);
294   }
295 
296   return result;
297 }
298 
299 }  // namespace mcld
300