1 //===- GarbageCollection.cpp ----------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/Fragment/Fragment.h>
10 #include <mcld/Fragment/Relocation.h>
11 #include <mcld/LD/GarbageCollection.h>
12 #include <mcld/LD/LDContext.h>
13 #include <mcld/LD/LDFileFormat.h>
14 #include <mcld/LD/LDSection.h>
15 #include <mcld/LD/LDSymbol.h>
16 #include <mcld/LD/SectionData.h>
17 #include <mcld/LD/RelocData.h>
18 #include <mcld/LinkerConfig.h>
19 #include <mcld/LinkerScript.h>
20 #include <mcld/Module.h>
21 #include <mcld/Support/MsgHandling.h>
22 #include <mcld/Target/TargetLDBackend.h>
23 
24 #include <llvm/Support/Casting.h>
25 
26 #include <queue>
27 #if !defined(MCLD_ON_WIN32)
28 #include <fnmatch.h>
29 #define fnmatch0(pattern,string) (fnmatch(pattern,string,0) == 0)
30 #else
31 #include <windows.h>
32 #include <shlwapi.h>
33 #define fnmatch0(pattern,string) (PathMatchSpec(string, pattern) == true)
34 #endif
35 
36 using namespace mcld;
37 
38 //===----------------------------------------------------------------------===//
39 // Non-member functions
40 //===----------------------------------------------------------------------===//
41 // FIXME: these rules should be added into SectionMap, while currently adding to
42 // SectionMap will cause the output order change in .text section and leads to
43 // the .ARM.exidx order incorrect. We should sort the .ARM.exidx.
44 static const char* pattern_to_keep[] =
45 {
46   ".text*personality*",
47   ".data*personality*",
48   ".gnu.linkonce.d*personality*",
49   ".sdata*personality*"
50 };
51 
52 /// shouldKeep - check the section name for the keep sections
shouldKeep(const std::string & pName)53 static bool shouldKeep(const std::string& pName)
54 {
55   static const unsigned int pattern_size =
56                            sizeof(pattern_to_keep) / sizeof(pattern_to_keep[0]);
57   for (unsigned int i=0; i < pattern_size; ++i) {
58     if (fnmatch0(pattern_to_keep[i], pName.c_str()))
59       return true;
60   }
61   return false;
62 }
63 
64 /// shouldProcessGC - check if the section kind is handled in GC
mayProcessGC(const LDSection & pSection)65 static bool mayProcessGC(const LDSection& pSection)
66 {
67   if (pSection.kind() == LDFileFormat::TEXT ||
68       pSection.kind() == LDFileFormat::DATA ||
69       pSection.kind() == LDFileFormat::BSS ||
70       pSection.kind() == LDFileFormat::GCCExceptTable)
71     return true;
72   return false;
73 }
74 
75 //===----------------------------------------------------------------------===//
76 // GarbageCollection::SectionReachedListMap
77 //===----------------------------------------------------------------------===//
78 void
addReference(const LDSection & pFrom,const LDSection & pTo)79 GarbageCollection::SectionReachedListMap::addReference(const LDSection& pFrom,
80                                                        const LDSection& pTo)
81 {
82   m_ReachedSections[&pFrom].insert(&pTo);
83 }
84 
85 GarbageCollection::SectionListTy&
getReachedList(const LDSection & pSection)86 GarbageCollection::SectionReachedListMap::getReachedList(
87                                                       const LDSection& pSection)
88 {
89   return m_ReachedSections[&pSection];
90 }
91 
92 GarbageCollection::SectionListTy*
findReachedList(const LDSection & pSection)93 GarbageCollection::SectionReachedListMap::findReachedList(
94                                                       const LDSection& pSection)
95 {
96   ReachedSectionsTy::iterator it = m_ReachedSections.find(&pSection);
97   if (it == m_ReachedSections.end())
98     return NULL;
99   return &it->second;
100 }
101 
102 //===----------------------------------------------------------------------===//
103 // GarbageCollection
104 //===----------------------------------------------------------------------===//
GarbageCollection(const LinkerConfig & pConfig,const TargetLDBackend & pBackend,Module & pModule)105 GarbageCollection::GarbageCollection(const LinkerConfig& pConfig,
106                                      const TargetLDBackend& pBackend,
107                                      Module& pModule)
108   : m_Config(pConfig), m_Backend(pBackend), m_Module(pModule)
109 {
110 }
111 
~GarbageCollection()112 GarbageCollection::~GarbageCollection()
113 {
114 }
115 
run()116 bool GarbageCollection::run()
117 {
118   // 1. traverse all the relocations to set up the reached sections of each
119   // section
120   setUpReachedSections();
121   m_Backend.setUpReachedSectionsForGC(m_Module, m_SectionReachedListMap);
122 
123   // 2. get all sections defined the entry point
124   SectionVecTy entry;
125   getEntrySections(entry);
126 
127   // 3. find all the referenced sections those can be reached by entry
128   findReferencedSections(entry);
129 
130   // 4. stripSections - set the unreached sections to Ignore
131   stripSections();
132   return true;
133 }
134 
setUpReachedSections()135 void GarbageCollection::setUpReachedSections()
136 {
137   // traverse all the input relocations to setup the reached sections
138   Module::obj_iterator input, inEnd = m_Module.obj_end();
139   for (input = m_Module.obj_begin(); input != inEnd; ++input) {
140     LDContext::sect_iterator rs, rsEnd = (*input)->context()->relocSectEnd();
141     for (rs = (*input)->context()->relocSectBegin(); rs != rsEnd; ++rs) {
142       // bypass the discarded relocation section
143       // 1. its section kind is changed to Ignore. (The target section is a
144       // discarded group section.)
145       // 2. it has no reloc data. (All symbols in the input relocs are in the
146       // discarded group sections)
147       LDSection* reloc_sect = *rs;
148       LDSection* apply_sect = reloc_sect->getLink();
149       if ((LDFileFormat::Ignore == reloc_sect->kind()) ||
150           (!reloc_sect->hasRelocData()))
151         continue;
152 
153       // bypass the apply target sections which are not handled by gc
154       if (!mayProcessGC(*apply_sect))
155         continue;
156 
157       bool add_first = false;
158       SectionListTy* reached_sects = NULL;
159       RelocData::iterator reloc_it, rEnd = reloc_sect->getRelocData()->end();
160       for (reloc_it = reloc_sect->getRelocData()->begin(); reloc_it != rEnd;
161                                                                    ++reloc_it) {
162         Relocation* reloc = llvm::cast<Relocation>(reloc_it);
163         ResolveInfo* sym = reloc->symInfo();
164         // only the target symbols defined in the input fragments can make the
165         // reference
166         if (NULL == sym)
167           continue;
168         if (!sym->isDefine() || !sym->outSymbol()->hasFragRef())
169           continue;
170 
171         // only the target symbols defined in the concerned sections can make
172         // the reference
173         const LDSection* target_sect =
174                 &sym->outSymbol()->fragRef()->frag()->getParent()->getSection();
175         if (!mayProcessGC(*target_sect))
176           continue;
177 
178         // setup the reached list, if we first add the element to reached list
179         // of this section, create an entry in ReachedSections map
180         if (!add_first) {
181           reached_sects = &m_SectionReachedListMap.getReachedList(*apply_sect);
182           add_first = true;
183         }
184         reached_sects->insert(target_sect);
185       }
186       reached_sects = NULL;
187       add_first = false;
188     }
189   }
190 }
191 
getEntrySections(SectionVecTy & pEntry)192 void GarbageCollection::getEntrySections(SectionVecTy& pEntry)
193 {
194   // all the KEEP sections defined in ldscript are entries, traverse all the
195   // input sections and check the SectionMap to find the KEEP sections
196   Module::obj_iterator obj, objEnd = m_Module.obj_end();
197   SectionMap& sect_map = m_Module.getScript().sectionMap();
198   for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
199     const std::string input_name = (*obj)->name();
200     LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
201     for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
202       LDSection* section = *sect;
203       if (!mayProcessGC(*section))
204         continue;
205 
206       SectionMap::Input* sm_input =
207                               sect_map.find(input_name, section->name()).second;
208       if (((sm_input != NULL) && (InputSectDesc::Keep == sm_input->policy())) ||
209           shouldKeep(section->name()))
210         pEntry.push_back(section);
211     }
212   }
213 
214   // get the sections those the entry symbols defined in
215   if (LinkerConfig::Exec == m_Config.codeGenType() ||
216                                                    m_Config.options().isPIE()) {
217     // when building executable
218     // 1. the entry symbol is the entry
219     LDSymbol* entry_sym =
220                 m_Module.getNamePool().findSymbol(m_Backend.getEntry(m_Module));
221     assert(NULL != entry_sym);
222     pEntry.push_back(&entry_sym->fragRef()->frag()->getParent()->getSection());
223 
224     // 2. the symbols have been seen in dynamice objects are entries
225     NamePool::syminfo_iterator info_it,
226                                 info_end = m_Module.getNamePool().syminfo_end();
227     for (info_it = m_Module.getNamePool().syminfo_begin(); info_it != info_end;
228                                                                     ++info_it) {
229       ResolveInfo* info = info_it.getEntry();
230       if (!info->isDefine() || info->isLocal())
231         continue;
232 
233       if (!info->isInDyn())
234         continue;
235 
236       LDSymbol* sym = info->outSymbol();
237       if (NULL == sym || !sym->hasFragRef())
238         continue;
239 
240       // only the target symbols defined in the concerned sections can be
241       // entries
242       const LDSection* sect =
243                              &sym->fragRef()->frag()->getParent()->getSection();
244       if (!mayProcessGC(*sect))
245         continue;
246 
247       pEntry.push_back(sect);
248     }
249   }
250 
251   else {
252     // when building shared objects, the global define symbols are entries
253     NamePool::syminfo_iterator info_it,
254                                 info_end = m_Module.getNamePool().syminfo_end();
255     for (info_it = m_Module.getNamePool().syminfo_begin(); info_it != info_end;
256                                                                     ++info_it) {
257       ResolveInfo* info = info_it.getEntry();
258       if (!info->isDefine() ||
259           info->isLocal()   ||
260           info->shouldForceLocal(m_Config))
261         continue;
262       LDSymbol* sym = info->outSymbol();
263       if (NULL == sym || !sym->hasFragRef())
264         continue;
265 
266       // only the target symbols defined in the concerned sections can be
267       // entries
268       const LDSection* sect =
269                              &sym->fragRef()->frag()->getParent()->getSection();
270       if (!mayProcessGC(*sect))
271         continue;
272       pEntry.push_back(sect);
273     }
274   }
275 
276   // symbols set by -u should not be garbage collected. Set them entries.
277   GeneralOptions::const_undef_sym_iterator usym;
278   GeneralOptions::const_undef_sym_iterator usymEnd =
279                                              m_Config.options().undef_sym_end();
280   for (usym = m_Config.options().undef_sym_begin(); usym != usymEnd; ++usym) {
281     LDSymbol* sym = m_Module.getNamePool().findSymbol(*usym);
282     assert(sym);
283     ResolveInfo* info = sym->resolveInfo();
284     assert(info);
285     if (!info->isDefine() || !sym->hasFragRef())
286       continue;
287     // only the symbols defined in the concerned sections can be entries
288     const LDSection* sect = &sym->fragRef()->frag()->getParent()->getSection();
289     if (!mayProcessGC(*sect))
290       continue;
291     pEntry.push_back(sect);
292   }
293 }
294 
findReferencedSections(SectionVecTy & pEntry)295 void GarbageCollection::findReferencedSections(SectionVecTy& pEntry)
296 {
297   // list of sections waiting to be processed
298   typedef std::queue<const LDSection*> WorkListTy;
299   WorkListTy work_list;
300   // start from each entry, resolve the transitive closure
301   SectionVecTy::iterator entry_it, entry_end = pEntry.end();
302   for (entry_it = pEntry.begin(); entry_it != entry_end; ++entry_it) {
303     // add entry point to work list
304     work_list.push(*entry_it);
305 
306     // add section from the work_list to the referencedSections until every
307     // reached sections are added
308     while (!work_list.empty()) {
309       const LDSection* sect = work_list.front();
310       work_list.pop();
311       // add section to the ReferencedSections, if the section has been put into
312       // referencedSections, skip this section
313       if (!m_ReferencedSections.insert(sect).second)
314         continue;
315 
316       // get the section reached list, if the section do not has one, which
317       // means no referenced between it and other sections, then skip it
318       SectionListTy* reach_list =
319                                  m_SectionReachedListMap.findReachedList(*sect);
320       if (NULL == reach_list)
321         continue;
322 
323       // put the reached sections to work list, skip the one already be in
324       // referencedSections
325       SectionListTy::iterator it, end = reach_list->end();
326       for (it = reach_list->begin(); it != end; ++it) {
327         if (m_ReferencedSections.find(*it) == m_ReferencedSections.end())
328           work_list.push(*it);
329       }
330     }
331   }
332 }
333 
stripSections()334 void GarbageCollection::stripSections()
335 {
336   // Traverse all the input Regular and BSS sections, if a section is not found
337   // in the ReferencedSections, then it should be garbage collected
338   Module::obj_iterator obj, objEnd = m_Module.obj_end();
339   for (obj = m_Module.obj_begin(); obj != objEnd; ++obj) {
340     LDContext::sect_iterator sect, sectEnd = (*obj)->context()->sectEnd();
341     for (sect = (*obj)->context()->sectBegin(); sect != sectEnd; ++sect) {
342       LDSection* section = *sect;
343       if (!mayProcessGC(*section))
344         continue;
345 
346       if (m_ReferencedSections.find(section) == m_ReferencedSections.end()) {
347         section->setKind(LDFileFormat::Ignore);
348         debug(diag::debug_print_gc_sections) << section->name()
349                                              << (*obj)->name();
350       }
351     }
352   }
353 
354   // Traverse all the relocation sections, if its target section is set to
355   // Ignore, then set the relocation section to Ignore as well
356   Module::obj_iterator input, inEnd = m_Module.obj_end();
357   for (input = m_Module.obj_begin(); input != inEnd; ++input) {
358     LDContext::sect_iterator rs, rsEnd = (*input)->context()->relocSectEnd();
359     for (rs = (*input)->context()->relocSectBegin(); rs != rsEnd; ++rs) {
360       LDSection* reloc_sect = *rs;
361       if (LDFileFormat::Ignore == reloc_sect->getLink()->kind())
362         reloc_sect->setKind(LDFileFormat::Ignore);
363     }
364   }
365 }
366 
367