1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/GNUArchiveReader.h>
10 
11 #include <mcld/Module.h>
12 #include <mcld/InputTree.h>
13 #include <mcld/LinkerConfig.h>
14 #include <mcld/MC/Attribute.h>
15 #include <mcld/MC/Input.h>
16 #include <mcld/LD/ResolveInfo.h>
17 #include <mcld/LD/ELFObjectReader.h>
18 #include <mcld/Support/FileSystem.h>
19 #include <mcld/Support/FileHandle.h>
20 #include <mcld/Support/MemoryArea.h>
21 #include <mcld/Support/MsgHandling.h>
22 #include <mcld/Support/Path.h>
23 #include <mcld/ADT/SizeTraits.h>
24 
25 #include <llvm/ADT/StringRef.h>
26 #include <llvm/Support/Host.h>
27 
28 #include <cstring>
29 #include <cstdlib>
30 
31 using namespace mcld;
32 
GNUArchiveReader(Module & pModule,ELFObjectReader & pELFObjectReader)33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
34                                    ELFObjectReader& pELFObjectReader)
35  : m_Module(pModule),
36    m_ELFObjectReader(pELFObjectReader)
37 {
38 }
39 
~GNUArchiveReader()40 GNUArchiveReader::~GNUArchiveReader()
41 {
42 }
43 
44 /// isMyFormat
isMyFormat(Input & pInput,bool & pContinue) const45 bool GNUArchiveReader::isMyFormat(Input& pInput, bool &pContinue) const
46 {
47   assert(pInput.hasMemArea());
48   if (pInput.memArea()->size() < Archive::MAGIC_LEN)
49     return false;
50 
51   llvm::StringRef region =
52       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
53   const char* str = region.begin();
54 
55   bool result = false;
56   assert(NULL != str);
57   pContinue = true;
58   if (isArchive(str) || isThinArchive(str))
59     result = true;
60 
61   return result;
62 }
63 
64 /// isArchive
isArchive(const char * pStr) const65 bool GNUArchiveReader::isArchive(const char* pStr) const
66 {
67   return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
68 }
69 
70 /// isThinArchive
isThinArchive(const char * pStr) const71 bool GNUArchiveReader::isThinArchive(const char* pStr) const
72 {
73   return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
74 }
75 
76 /// isThinArchive
isThinArchive(Input & pInput) const77 bool GNUArchiveReader::isThinArchive(Input& pInput) const
78 {
79   assert(pInput.hasMemArea());
80   llvm::StringRef region =
81       pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
82   const char* str = region.begin();
83 
84   bool result = false;
85   assert(NULL != str);
86   if (isThinArchive(str))
87     result = true;
88 
89   return result;
90 }
91 
readArchive(const LinkerConfig & pConfig,Archive & pArchive)92 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
93                                    Archive& pArchive)
94 {
95   // bypass the empty archive
96   if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
97     return true;
98 
99   if (pArchive.getARFile().attribute()->isWholeArchive())
100     return includeAllMembers(pConfig, pArchive);
101 
102   // if this is the first time read this archive, setup symtab and strtab
103   if (pArchive.getSymbolTable().empty()) {
104   // read the symtab of the archive
105   readSymbolTable(pArchive);
106 
107   // read the strtab of the archive
108   readStringTable(pArchive);
109 
110   // add root archive to ArchiveMemberMap
111   pArchive.addArchiveMember(pArchive.getARFile().name(),
112                             pArchive.inputs().root(),
113                             &InputTree::Downward);
114   }
115 
116   // include the needed members in the archive and build up the input tree
117   bool willSymResolved;
118   do {
119     willSymResolved = false;
120     for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
121       // bypass if we already decided to include this symbol or not
122       if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
123         continue;
124 
125       // bypass if another symbol with the same object file offset is included
126       if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
127         pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
128         continue;
129       }
130 
131       // check if we should include this defined symbol
132       Archive::Symbol::Status status =
133         shouldIncludeSymbol(pArchive.getSymbolName(idx));
134       if (Archive::Symbol::Unknown != status)
135         pArchive.setSymbolStatus(idx, status);
136 
137       if (Archive::Symbol::Include == status) {
138         // include the object member from the given offset
139         includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
140         willSymResolved = true;
141       } // end of if
142     } // end of for
143   } while (willSymResolved);
144 
145   return true;
146 }
147 
148 /// readMemberHeader - read the header of a member in a archive file and then
149 /// return the corresponding archive member (it may be an input object or
150 /// another archive)
151 /// @param pArchiveRoot  - the archive root that holds the strtab (extended
152 ///                        name table)
153 /// @param pArchiveFile  - the archive that contains the needed object
154 /// @param pFileOffset   - file offset of the member header in the archive
155 /// @param pNestedOffset - used when we find a nested archive
156 /// @param pMemberSize   - the file size of this member
readMemberHeader(Archive & pArchiveRoot,Input & pArchiveFile,uint32_t pFileOffset,uint32_t & pNestedOffset,size_t & pMemberSize)157 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
158                                           Input& pArchiveFile,
159                                           uint32_t pFileOffset,
160                                           uint32_t& pNestedOffset,
161                                           size_t& pMemberSize)
162 {
163   assert(pArchiveFile.hasMemArea());
164 
165   llvm::StringRef header_region =
166     pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
167                                     sizeof(Archive::MemberHeader));
168   const Archive::MemberHeader* header =
169     reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
170 
171   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
172 
173   pMemberSize = atoi(header->size);
174 
175   // parse the member name and nested offset if any
176   std::string member_name;
177   llvm::StringRef name_field(header->name, sizeof(header->name));
178   if ('/' != header->name[0]) {
179     // this is an object file in an archive
180     size_t pos = name_field.find_first_of('/');
181     member_name.assign(name_field.substr(0, pos).str());
182   }
183   else {
184     // this is an object/archive file in a thin archive
185     size_t begin = 1;
186     size_t end = name_field.find_first_of(" :");
187     uint32_t name_offset = 0;
188     // parse the name offset
189     name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
190 
191     if (':' == name_field[end]) {
192       // there is a nested offset
193       begin = end + 1;
194       end = name_field.find_first_of(' ', begin);
195       name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
196     }
197 
198     // get the member name from the extended name table
199     assert(pArchiveRoot.hasStrTable());
200     begin = name_offset;
201     end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
202     member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
203   }
204 
205   Input* member = NULL;
206   bool isThinAR = isThinArchive(pArchiveFile);
207   if (!isThinAR) {
208     // this is an object file in an archive
209     member = pArchiveRoot.getMemberFile(pArchiveFile,
210                                         isThinAR,
211                                         member_name,
212                                         pArchiveFile.path(),
213                                         (pFileOffset +
214                                          sizeof(Archive::MemberHeader)));
215   }
216   else {
217     // this is a member in a thin archive
218     // try to find if this is a archive already in the map first
219     Archive::ArchiveMember* ar_member =
220       pArchiveRoot.getArchiveMember(member_name);
221     if (NULL != ar_member) {
222       return ar_member->file;
223     }
224 
225     // get nested file path, the nested file's member name is the relative
226     // path to the archive containing it.
227     sys::fs::Path input_path(pArchiveFile.path().parent_path());
228     if (!input_path.empty())
229       input_path.append(member_name);
230     else
231       input_path.assign(member_name);
232 
233     member = pArchiveRoot.getMemberFile(pArchiveFile,
234                                         isThinAR,
235                                         member_name,
236                                         input_path);
237   }
238 
239   return member;
240 }
241 
242 template <size_t SIZE>
readSymbolTableEntries(Archive & pArchive,llvm::StringRef pMemRegion)243 static void readSymbolTableEntries(Archive& pArchive, llvm::StringRef pMemRegion)
244 {
245   typedef typename SizeTraits<SIZE>::Offset Offset;
246 
247   const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
248 
249   // read the number of symbols
250   Offset number = 0;
251   if (llvm::sys::IsLittleEndianHost)
252     number = mcld::bswap<SIZE>(*data);
253   else
254     number = *data;
255 
256   // set up the pointers for file offset and name offset
257   ++data;
258   const char* name = reinterpret_cast<const char*>(data + number);
259 
260   // add the archive symbols
261   for (Offset i = 0; i < number; ++i) {
262     if (llvm::sys::IsLittleEndianHost)
263       pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
264     else
265       pArchive.addSymbol(name, *data);
266     name += strlen(name) + 1;
267     ++data;
268   }
269 }
270 
271 /// readSymbolTable - read the archive symbol map (armap)
readSymbolTable(Archive & pArchive)272 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
273 {
274   assert(pArchive.getARFile().hasMemArea());
275 
276   llvm::StringRef header_region =
277     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
278                                              Archive::MAGIC_LEN),
279                                             sizeof(Archive::MemberHeader));
280   const Archive::MemberHeader* header =
281     reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
282   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
283 
284   int symtab_size = atoi(header->size);
285   pArchive.setSymTabSize(symtab_size);
286 
287   if (!pArchive.getARFile().attribute()->isWholeArchive()) {
288     llvm::StringRef symtab_region = pArchive.getARFile().memArea()->request(
289         (pArchive.getARFile().fileOffset() +
290          Archive::MAGIC_LEN +
291          sizeof(Archive::MemberHeader)),
292         symtab_size);
293 
294     if (0 == strncmp(header->name, Archive::SVR4_SYMTAB_NAME,
295                                    strlen(Archive::SVR4_SYMTAB_NAME)))
296       readSymbolTableEntries<32>(pArchive, symtab_region);
297     else if (0 == strncmp(header->name, Archive::IRIX6_SYMTAB_NAME,
298                                         strlen(Archive::IRIX6_SYMTAB_NAME)))
299       readSymbolTableEntries<64>(pArchive, symtab_region);
300     else
301       unreachable(diag::err_unsupported_archive);
302 
303   }
304   return true;
305 }
306 
307 /// readStringTable - read the strtab for long file name of the archive
readStringTable(Archive & pArchive)308 bool GNUArchiveReader::readStringTable(Archive& pArchive)
309 {
310   size_t offset = Archive::MAGIC_LEN +
311                   sizeof(Archive::MemberHeader) +
312                   pArchive.getSymTabSize();
313 
314   if (0x0 != (offset & 1))
315     ++offset;
316 
317   assert(pArchive.getARFile().hasMemArea());
318 
319   llvm::StringRef header_region =
320     pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
321                                              offset),
322                                             sizeof(Archive::MemberHeader));
323   const Archive::MemberHeader* header =
324     reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
325 
326   assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
327 
328   if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
329     // read the extended name table
330     int strtab_size = atoi(header->size);
331     llvm::StringRef strtab_region =
332       pArchive.getARFile().memArea()->request(
333                                    (pArchive.getARFile().fileOffset() +
334                                     offset + sizeof(Archive::MemberHeader)),
335                                    strtab_size);
336     const char* strtab = strtab_region.begin();
337     pArchive.getStrTable().assign(strtab, strtab_size);
338   }
339   return true;
340 }
341 
342 /// shouldIncludeStatus - given a sym name from armap and check if including
343 /// the corresponding archive member, and then return the decision
344 enum Archive::Symbol::Status
shouldIncludeSymbol(const llvm::StringRef & pSymName) const345 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
346 {
347   // TODO: handle symbol version issue and user defined symbols
348   const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
349   if (NULL != info) {
350     if (!info->isUndef())
351       return Archive::Symbol::Exclude;
352     if (info->isWeak())
353       return Archive::Symbol::Unknown;
354     return Archive::Symbol::Include;
355   }
356   return Archive::Symbol::Unknown;
357 }
358 
359 /// includeMember - include the object member in the given file offset, and
360 /// return the size of the object
361 /// @param pConfig - LinkerConfig
362 /// @param pArchiveRoot - the archive root
363 /// @param pFileOffset  - file offset of the member header in the archive
includeMember(const LinkerConfig & pConfig,Archive & pArchive,uint32_t pFileOffset)364 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
365                                        Archive& pArchive,
366                                        uint32_t pFileOffset)
367 {
368   Input* cur_archive = &(pArchive.getARFile());
369   Input* member = NULL;
370   uint32_t file_offset = pFileOffset;
371   size_t size = 0;
372   do {
373     uint32_t nested_offset = 0;
374     // use the file offset in current archive to find out the member we
375     // want to include
376     member = readMemberHeader(pArchive,
377                               *cur_archive,
378                               file_offset,
379                               nested_offset,
380                               size);
381     assert(member != NULL);
382     // bypass if we get an archive that is already in the map
383     if (Input::Archive == member->type()) {
384         cur_archive = member;
385         file_offset = nested_offset;
386         continue;
387     }
388 
389     // insert a node into the subtree of current archive.
390     Archive::ArchiveMember* parent =
391       pArchive.getArchiveMember(cur_archive->name());
392 
393     assert(NULL != parent);
394     pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
395 
396     // move the iterator to new created node, and also adjust the
397     // direction to Afterward for next insertion in this subtree
398     parent->move->move(parent->lastPos);
399     parent->move = &InputTree::Afterward;
400     bool doContinue = false;
401 
402     if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
403       member->setType(Input::Object);
404       // Set this object as no export if the archive is in the exclude libs.
405       if (pArchive.getARFile().noExport()) {
406         member->setNoExport();
407       }
408       pArchive.addObjectMember(pFileOffset, parent->lastPos);
409       m_ELFObjectReader.readHeader(*member);
410       m_ELFObjectReader.readSections(*member);
411       m_ELFObjectReader.readSymbols(*member);
412       m_Module.getObjectList().push_back(member);
413     }
414     else if (doContinue && isMyFormat(*member, doContinue)) {
415       member->setType(Input::Archive);
416       // when adding a new archive node, set the iterator to archive
417       // itself, and set the direction to Downward
418       pArchive.addArchiveMember(member->name(),
419                                 parent->lastPos,
420                                 &InputTree::Downward);
421       cur_archive = member;
422       file_offset = nested_offset;
423     }
424     else {
425       warning(diag::warn_unrecognized_input_file) << member->path()
426         << pConfig.targets().triple().str();
427     }
428   } while (Input::Object != member->type());
429   return size;
430 }
431 
432 /// includeAllMembers - include all object members. This is called if
433 /// --whole-archive is the attribute for this archive file.
includeAllMembers(const LinkerConfig & pConfig,Archive & pArchive)434 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
435                                          Archive& pArchive)
436 {
437   // read the symtab of the archive
438   readSymbolTable(pArchive);
439 
440   // read the strtab of the archive
441   readStringTable(pArchive);
442 
443   // add root archive to ArchiveMemberMap
444   pArchive.addArchiveMember(pArchive.getARFile().name(),
445                             pArchive.inputs().root(),
446                             &InputTree::Downward);
447 
448   bool isThinAR = isThinArchive(pArchive.getARFile());
449   uint32_t begin_offset = pArchive.getARFile().fileOffset() +
450                           Archive::MAGIC_LEN +
451                           sizeof(Archive::MemberHeader) +
452                           pArchive.getSymTabSize();
453   if (pArchive.hasStrTable()) {
454     if (0x0 != (begin_offset & 1))
455       ++begin_offset;
456     begin_offset += sizeof(Archive::MemberHeader) +
457                     pArchive.getStrTable().size();
458   }
459   uint32_t end_offset = pArchive.getARFile().memArea()->size();
460   for (uint32_t offset = begin_offset;
461        offset < end_offset;
462        offset += sizeof(Archive::MemberHeader)) {
463 
464     size_t size = includeMember(pConfig, pArchive, offset);
465 
466     if (!isThinAR) {
467       offset += size;
468     }
469 
470     if (0x0 != (offset & 1))
471       ++offset;
472   }
473   return true;
474 }
475