1 //===- GNUArchiveReader.cpp -----------------------------------------------===//
2 //
3 // The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include <mcld/LD/GNUArchiveReader.h>
10
11 #include <mcld/Module.h>
12 #include <mcld/InputTree.h>
13 #include <mcld/LinkerConfig.h>
14 #include <mcld/MC/Attribute.h>
15 #include <mcld/MC/Input.h>
16 #include <mcld/LD/ResolveInfo.h>
17 #include <mcld/LD/ELFObjectReader.h>
18 #include <mcld/Support/FileSystem.h>
19 #include <mcld/Support/FileHandle.h>
20 #include <mcld/Support/MemoryArea.h>
21 #include <mcld/Support/MsgHandling.h>
22 #include <mcld/Support/Path.h>
23 #include <mcld/ADT/SizeTraits.h>
24
25 #include <llvm/ADT/StringRef.h>
26 #include <llvm/Support/Host.h>
27
28 #include <cstring>
29 #include <cstdlib>
30
31 using namespace mcld;
32
GNUArchiveReader(Module & pModule,ELFObjectReader & pELFObjectReader)33 GNUArchiveReader::GNUArchiveReader(Module& pModule,
34 ELFObjectReader& pELFObjectReader)
35 : m_Module(pModule),
36 m_ELFObjectReader(pELFObjectReader)
37 {
38 }
39
~GNUArchiveReader()40 GNUArchiveReader::~GNUArchiveReader()
41 {
42 }
43
44 /// isMyFormat
isMyFormat(Input & pInput,bool & pContinue) const45 bool GNUArchiveReader::isMyFormat(Input& pInput, bool &pContinue) const
46 {
47 assert(pInput.hasMemArea());
48 if (pInput.memArea()->size() < Archive::MAGIC_LEN)
49 return false;
50
51 llvm::StringRef region =
52 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
53 const char* str = region.begin();
54
55 bool result = false;
56 assert(NULL != str);
57 pContinue = true;
58 if (isArchive(str) || isThinArchive(str))
59 result = true;
60
61 return result;
62 }
63
64 /// isArchive
isArchive(const char * pStr) const65 bool GNUArchiveReader::isArchive(const char* pStr) const
66 {
67 return (0 == memcmp(pStr, Archive::MAGIC, Archive::MAGIC_LEN));
68 }
69
70 /// isThinArchive
isThinArchive(const char * pStr) const71 bool GNUArchiveReader::isThinArchive(const char* pStr) const
72 {
73 return (0 == memcmp(pStr, Archive::THIN_MAGIC, Archive::MAGIC_LEN));
74 }
75
76 /// isThinArchive
isThinArchive(Input & pInput) const77 bool GNUArchiveReader::isThinArchive(Input& pInput) const
78 {
79 assert(pInput.hasMemArea());
80 llvm::StringRef region =
81 pInput.memArea()->request(pInput.fileOffset(), Archive::MAGIC_LEN);
82 const char* str = region.begin();
83
84 bool result = false;
85 assert(NULL != str);
86 if (isThinArchive(str))
87 result = true;
88
89 return result;
90 }
91
readArchive(const LinkerConfig & pConfig,Archive & pArchive)92 bool GNUArchiveReader::readArchive(const LinkerConfig& pConfig,
93 Archive& pArchive)
94 {
95 // bypass the empty archive
96 if (Archive::MAGIC_LEN == pArchive.getARFile().memArea()->size())
97 return true;
98
99 if (pArchive.getARFile().attribute()->isWholeArchive())
100 return includeAllMembers(pConfig, pArchive);
101
102 // if this is the first time read this archive, setup symtab and strtab
103 if (pArchive.getSymbolTable().empty()) {
104 // read the symtab of the archive
105 readSymbolTable(pArchive);
106
107 // read the strtab of the archive
108 readStringTable(pArchive);
109
110 // add root archive to ArchiveMemberMap
111 pArchive.addArchiveMember(pArchive.getARFile().name(),
112 pArchive.inputs().root(),
113 &InputTree::Downward);
114 }
115
116 // include the needed members in the archive and build up the input tree
117 bool willSymResolved;
118 do {
119 willSymResolved = false;
120 for (size_t idx = 0; idx < pArchive.numOfSymbols(); ++idx) {
121 // bypass if we already decided to include this symbol or not
122 if (Archive::Symbol::Unknown != pArchive.getSymbolStatus(idx))
123 continue;
124
125 // bypass if another symbol with the same object file offset is included
126 if (pArchive.hasObjectMember(pArchive.getObjFileOffset(idx))) {
127 pArchive.setSymbolStatus(idx, Archive::Symbol::Include);
128 continue;
129 }
130
131 // check if we should include this defined symbol
132 Archive::Symbol::Status status =
133 shouldIncludeSymbol(pArchive.getSymbolName(idx));
134 if (Archive::Symbol::Unknown != status)
135 pArchive.setSymbolStatus(idx, status);
136
137 if (Archive::Symbol::Include == status) {
138 // include the object member from the given offset
139 includeMember(pConfig, pArchive, pArchive.getObjFileOffset(idx));
140 willSymResolved = true;
141 } // end of if
142 } // end of for
143 } while (willSymResolved);
144
145 return true;
146 }
147
148 /// readMemberHeader - read the header of a member in a archive file and then
149 /// return the corresponding archive member (it may be an input object or
150 /// another archive)
151 /// @param pArchiveRoot - the archive root that holds the strtab (extended
152 /// name table)
153 /// @param pArchiveFile - the archive that contains the needed object
154 /// @param pFileOffset - file offset of the member header in the archive
155 /// @param pNestedOffset - used when we find a nested archive
156 /// @param pMemberSize - the file size of this member
readMemberHeader(Archive & pArchiveRoot,Input & pArchiveFile,uint32_t pFileOffset,uint32_t & pNestedOffset,size_t & pMemberSize)157 Input* GNUArchiveReader::readMemberHeader(Archive& pArchiveRoot,
158 Input& pArchiveFile,
159 uint32_t pFileOffset,
160 uint32_t& pNestedOffset,
161 size_t& pMemberSize)
162 {
163 assert(pArchiveFile.hasMemArea());
164
165 llvm::StringRef header_region =
166 pArchiveFile.memArea()->request((pArchiveFile.fileOffset() + pFileOffset),
167 sizeof(Archive::MemberHeader));
168 const Archive::MemberHeader* header =
169 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
170
171 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
172
173 pMemberSize = atoi(header->size);
174
175 // parse the member name and nested offset if any
176 std::string member_name;
177 llvm::StringRef name_field(header->name, sizeof(header->name));
178 if ('/' != header->name[0]) {
179 // this is an object file in an archive
180 size_t pos = name_field.find_first_of('/');
181 member_name.assign(name_field.substr(0, pos).str());
182 }
183 else {
184 // this is an object/archive file in a thin archive
185 size_t begin = 1;
186 size_t end = name_field.find_first_of(" :");
187 uint32_t name_offset = 0;
188 // parse the name offset
189 name_field.substr(begin, end - begin).getAsInteger(10, name_offset);
190
191 if (':' == name_field[end]) {
192 // there is a nested offset
193 begin = end + 1;
194 end = name_field.find_first_of(' ', begin);
195 name_field.substr(begin, end - begin).getAsInteger(10, pNestedOffset);
196 }
197
198 // get the member name from the extended name table
199 assert(pArchiveRoot.hasStrTable());
200 begin = name_offset;
201 end = pArchiveRoot.getStrTable().find_first_of('\n', begin);
202 member_name.assign(pArchiveRoot.getStrTable().substr(begin, end - begin -1));
203 }
204
205 Input* member = NULL;
206 bool isThinAR = isThinArchive(pArchiveFile);
207 if (!isThinAR) {
208 // this is an object file in an archive
209 member = pArchiveRoot.getMemberFile(pArchiveFile,
210 isThinAR,
211 member_name,
212 pArchiveFile.path(),
213 (pFileOffset +
214 sizeof(Archive::MemberHeader)));
215 }
216 else {
217 // this is a member in a thin archive
218 // try to find if this is a archive already in the map first
219 Archive::ArchiveMember* ar_member =
220 pArchiveRoot.getArchiveMember(member_name);
221 if (NULL != ar_member) {
222 return ar_member->file;
223 }
224
225 // get nested file path, the nested file's member name is the relative
226 // path to the archive containing it.
227 sys::fs::Path input_path(pArchiveFile.path().parent_path());
228 if (!input_path.empty())
229 input_path.append(member_name);
230 else
231 input_path.assign(member_name);
232
233 member = pArchiveRoot.getMemberFile(pArchiveFile,
234 isThinAR,
235 member_name,
236 input_path);
237 }
238
239 return member;
240 }
241
242 template <size_t SIZE>
readSymbolTableEntries(Archive & pArchive,llvm::StringRef pMemRegion)243 static void readSymbolTableEntries(Archive& pArchive, llvm::StringRef pMemRegion)
244 {
245 typedef typename SizeTraits<SIZE>::Offset Offset;
246
247 const Offset* data = reinterpret_cast<const Offset*>(pMemRegion.begin());
248
249 // read the number of symbols
250 Offset number = 0;
251 if (llvm::sys::IsLittleEndianHost)
252 number = mcld::bswap<SIZE>(*data);
253 else
254 number = *data;
255
256 // set up the pointers for file offset and name offset
257 ++data;
258 const char* name = reinterpret_cast<const char*>(data + number);
259
260 // add the archive symbols
261 for (Offset i = 0; i < number; ++i) {
262 if (llvm::sys::IsLittleEndianHost)
263 pArchive.addSymbol(name, mcld::bswap<SIZE>(*data));
264 else
265 pArchive.addSymbol(name, *data);
266 name += strlen(name) + 1;
267 ++data;
268 }
269 }
270
271 /// readSymbolTable - read the archive symbol map (armap)
readSymbolTable(Archive & pArchive)272 bool GNUArchiveReader::readSymbolTable(Archive& pArchive)
273 {
274 assert(pArchive.getARFile().hasMemArea());
275
276 llvm::StringRef header_region =
277 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
278 Archive::MAGIC_LEN),
279 sizeof(Archive::MemberHeader));
280 const Archive::MemberHeader* header =
281 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
282 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
283
284 int symtab_size = atoi(header->size);
285 pArchive.setSymTabSize(symtab_size);
286
287 if (!pArchive.getARFile().attribute()->isWholeArchive()) {
288 llvm::StringRef symtab_region = pArchive.getARFile().memArea()->request(
289 (pArchive.getARFile().fileOffset() +
290 Archive::MAGIC_LEN +
291 sizeof(Archive::MemberHeader)),
292 symtab_size);
293
294 if (0 == strncmp(header->name, Archive::SVR4_SYMTAB_NAME,
295 strlen(Archive::SVR4_SYMTAB_NAME)))
296 readSymbolTableEntries<32>(pArchive, symtab_region);
297 else if (0 == strncmp(header->name, Archive::IRIX6_SYMTAB_NAME,
298 strlen(Archive::IRIX6_SYMTAB_NAME)))
299 readSymbolTableEntries<64>(pArchive, symtab_region);
300 else
301 unreachable(diag::err_unsupported_archive);
302
303 }
304 return true;
305 }
306
307 /// readStringTable - read the strtab for long file name of the archive
readStringTable(Archive & pArchive)308 bool GNUArchiveReader::readStringTable(Archive& pArchive)
309 {
310 size_t offset = Archive::MAGIC_LEN +
311 sizeof(Archive::MemberHeader) +
312 pArchive.getSymTabSize();
313
314 if (0x0 != (offset & 1))
315 ++offset;
316
317 assert(pArchive.getARFile().hasMemArea());
318
319 llvm::StringRef header_region =
320 pArchive.getARFile().memArea()->request((pArchive.getARFile().fileOffset() +
321 offset),
322 sizeof(Archive::MemberHeader));
323 const Archive::MemberHeader* header =
324 reinterpret_cast<const Archive::MemberHeader*>(header_region.begin());
325
326 assert(0 == memcmp(header->fmag, Archive::MEMBER_MAGIC, sizeof(header->fmag)));
327
328 if (0 == memcmp(header->name, Archive::STRTAB_NAME, sizeof(header->name))) {
329 // read the extended name table
330 int strtab_size = atoi(header->size);
331 llvm::StringRef strtab_region =
332 pArchive.getARFile().memArea()->request(
333 (pArchive.getARFile().fileOffset() +
334 offset + sizeof(Archive::MemberHeader)),
335 strtab_size);
336 const char* strtab = strtab_region.begin();
337 pArchive.getStrTable().assign(strtab, strtab_size);
338 }
339 return true;
340 }
341
342 /// shouldIncludeStatus - given a sym name from armap and check if including
343 /// the corresponding archive member, and then return the decision
344 enum Archive::Symbol::Status
shouldIncludeSymbol(const llvm::StringRef & pSymName) const345 GNUArchiveReader::shouldIncludeSymbol(const llvm::StringRef& pSymName) const
346 {
347 // TODO: handle symbol version issue and user defined symbols
348 const ResolveInfo* info = m_Module.getNamePool().findInfo(pSymName);
349 if (NULL != info) {
350 if (!info->isUndef())
351 return Archive::Symbol::Exclude;
352 if (info->isWeak())
353 return Archive::Symbol::Unknown;
354 return Archive::Symbol::Include;
355 }
356 return Archive::Symbol::Unknown;
357 }
358
359 /// includeMember - include the object member in the given file offset, and
360 /// return the size of the object
361 /// @param pConfig - LinkerConfig
362 /// @param pArchiveRoot - the archive root
363 /// @param pFileOffset - file offset of the member header in the archive
includeMember(const LinkerConfig & pConfig,Archive & pArchive,uint32_t pFileOffset)364 size_t GNUArchiveReader::includeMember(const LinkerConfig& pConfig,
365 Archive& pArchive,
366 uint32_t pFileOffset)
367 {
368 Input* cur_archive = &(pArchive.getARFile());
369 Input* member = NULL;
370 uint32_t file_offset = pFileOffset;
371 size_t size = 0;
372 do {
373 uint32_t nested_offset = 0;
374 // use the file offset in current archive to find out the member we
375 // want to include
376 member = readMemberHeader(pArchive,
377 *cur_archive,
378 file_offset,
379 nested_offset,
380 size);
381 assert(member != NULL);
382 // bypass if we get an archive that is already in the map
383 if (Input::Archive == member->type()) {
384 cur_archive = member;
385 file_offset = nested_offset;
386 continue;
387 }
388
389 // insert a node into the subtree of current archive.
390 Archive::ArchiveMember* parent =
391 pArchive.getArchiveMember(cur_archive->name());
392
393 assert(NULL != parent);
394 pArchive.inputs().insert(parent->lastPos, *(parent->move), *member);
395
396 // move the iterator to new created node, and also adjust the
397 // direction to Afterward for next insertion in this subtree
398 parent->move->move(parent->lastPos);
399 parent->move = &InputTree::Afterward;
400 bool doContinue = false;
401
402 if (m_ELFObjectReader.isMyFormat(*member, doContinue)) {
403 member->setType(Input::Object);
404 // Set this object as no export if the archive is in the exclude libs.
405 if (pArchive.getARFile().noExport()) {
406 member->setNoExport();
407 }
408 pArchive.addObjectMember(pFileOffset, parent->lastPos);
409 m_ELFObjectReader.readHeader(*member);
410 m_ELFObjectReader.readSections(*member);
411 m_ELFObjectReader.readSymbols(*member);
412 m_Module.getObjectList().push_back(member);
413 }
414 else if (doContinue && isMyFormat(*member, doContinue)) {
415 member->setType(Input::Archive);
416 // when adding a new archive node, set the iterator to archive
417 // itself, and set the direction to Downward
418 pArchive.addArchiveMember(member->name(),
419 parent->lastPos,
420 &InputTree::Downward);
421 cur_archive = member;
422 file_offset = nested_offset;
423 }
424 else {
425 warning(diag::warn_unrecognized_input_file) << member->path()
426 << pConfig.targets().triple().str();
427 }
428 } while (Input::Object != member->type());
429 return size;
430 }
431
432 /// includeAllMembers - include all object members. This is called if
433 /// --whole-archive is the attribute for this archive file.
includeAllMembers(const LinkerConfig & pConfig,Archive & pArchive)434 bool GNUArchiveReader::includeAllMembers(const LinkerConfig& pConfig,
435 Archive& pArchive)
436 {
437 // read the symtab of the archive
438 readSymbolTable(pArchive);
439
440 // read the strtab of the archive
441 readStringTable(pArchive);
442
443 // add root archive to ArchiveMemberMap
444 pArchive.addArchiveMember(pArchive.getARFile().name(),
445 pArchive.inputs().root(),
446 &InputTree::Downward);
447
448 bool isThinAR = isThinArchive(pArchive.getARFile());
449 uint32_t begin_offset = pArchive.getARFile().fileOffset() +
450 Archive::MAGIC_LEN +
451 sizeof(Archive::MemberHeader) +
452 pArchive.getSymTabSize();
453 if (pArchive.hasStrTable()) {
454 if (0x0 != (begin_offset & 1))
455 ++begin_offset;
456 begin_offset += sizeof(Archive::MemberHeader) +
457 pArchive.getStrTable().size();
458 }
459 uint32_t end_offset = pArchive.getARFile().memArea()->size();
460 for (uint32_t offset = begin_offset;
461 offset < end_offset;
462 offset += sizeof(Archive::MemberHeader)) {
463
464 size_t size = includeMember(pConfig, pArchive, offset);
465
466 if (!isThinAR) {
467 offset += size;
468 }
469
470 if (0x0 != (offset & 1))
471 ++offset;
472 }
473 return true;
474 }
475