1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the ArchiveObjectFile class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Object/Binary.h"
20 #include "llvm/Object/Error.h"
21 #include "llvm/Support/Chrono.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/ErrorOr.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <cstring>
34 #include <memory>
35 #include <string>
36 #include <system_error>
37
38 using namespace llvm;
39 using namespace object;
40 using namespace llvm::support::endian;
41
42 static const char *const Magic = "!<arch>\n";
43 static const char *const ThinMagic = "!<thin>\n";
44
anchor()45 void Archive::anchor() {}
46
47 static Error
malformedError(Twine Msg)48 malformedError(Twine Msg) {
49 std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
50 return make_error<GenericBinaryError>(std::move(StringMsg),
51 object_error::parse_failed);
52 }
53
ArchiveMemberHeader(const Archive * Parent,const char * RawHeaderPtr,uint64_t Size,Error * Err)54 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
55 const char *RawHeaderPtr,
56 uint64_t Size, Error *Err)
57 : Parent(Parent),
58 ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
59 if (RawHeaderPtr == nullptr)
60 return;
61 ErrorAsOutParameter ErrAsOutParam(Err);
62
63 if (Size < sizeof(ArMemHdrType)) {
64 if (Err) {
65 std::string Msg("remaining size of archive too small for next archive "
66 "member header ");
67 Expected<StringRef> NameOrErr = getName(Size);
68 if (!NameOrErr) {
69 consumeError(NameOrErr.takeError());
70 uint64_t Offset = RawHeaderPtr - Parent->getData().data();
71 *Err = malformedError(Msg + "at offset " + Twine(Offset));
72 } else
73 *Err = malformedError(Msg + "for " + NameOrErr.get());
74 }
75 return;
76 }
77 if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
78 if (Err) {
79 std::string Buf;
80 raw_string_ostream OS(Buf);
81 OS.write_escaped(StringRef(ArMemHdr->Terminator,
82 sizeof(ArMemHdr->Terminator)));
83 OS.flush();
84 std::string Msg("terminator characters in archive member \"" + Buf +
85 "\" not the correct \"`\\n\" values for the archive "
86 "member header ");
87 Expected<StringRef> NameOrErr = getName(Size);
88 if (!NameOrErr) {
89 consumeError(NameOrErr.takeError());
90 uint64_t Offset = RawHeaderPtr - Parent->getData().data();
91 *Err = malformedError(Msg + "at offset " + Twine(Offset));
92 } else
93 *Err = malformedError(Msg + "for " + NameOrErr.get());
94 }
95 return;
96 }
97 }
98
99 // This gets the raw name from the ArMemHdr->Name field and checks that it is
100 // valid for the kind of archive. If it is not valid it returns an Error.
getRawName() const101 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
102 char EndCond;
103 auto Kind = Parent->kind();
104 if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
105 if (ArMemHdr->Name[0] == ' ') {
106 uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
107 Parent->getData().data();
108 return malformedError("name contains a leading space for archive member "
109 "header at offset " + Twine(Offset));
110 }
111 EndCond = ' ';
112 }
113 else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
114 EndCond = ' ';
115 else
116 EndCond = '/';
117 StringRef::size_type end =
118 StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
119 if (end == StringRef::npos)
120 end = sizeof(ArMemHdr->Name);
121 assert(end <= sizeof(ArMemHdr->Name) && end > 0);
122 // Don't include the EndCond if there is one.
123 return StringRef(ArMemHdr->Name, end);
124 }
125
126 // This gets the name looking up long names. Size is the size of the archive
127 // member including the header, so the size of any name following the header
128 // is checked to make sure it does not overflow.
getName(uint64_t Size) const129 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
130
131 // This can be called from the ArchiveMemberHeader constructor when the
132 // archive header is truncated to produce an error message with the name.
133 // Make sure the name field is not truncated.
134 if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
135 uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
136 Parent->getData().data();
137 return malformedError("archive header truncated before the name field "
138 "for archive member header at offset " +
139 Twine(ArchiveOffset));
140 }
141
142 // The raw name itself can be invalid.
143 Expected<StringRef> NameOrErr = getRawName();
144 if (!NameOrErr)
145 return NameOrErr.takeError();
146 StringRef Name = NameOrErr.get();
147
148 // Check if it's a special name.
149 if (Name[0] == '/') {
150 if (Name.size() == 1) // Linker member.
151 return Name;
152 if (Name.size() == 2 && Name[1] == '/') // String table.
153 return Name;
154 // It's a long name.
155 // Get the string table offset.
156 std::size_t StringOffset;
157 if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
158 std::string Buf;
159 raw_string_ostream OS(Buf);
160 OS.write_escaped(Name.substr(1).rtrim(' '));
161 OS.flush();
162 uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
163 Parent->getData().data();
164 return malformedError("long name offset characters after the '/' are "
165 "not all decimal numbers: '" + Buf + "' for "
166 "archive member header at offset " +
167 Twine(ArchiveOffset));
168 }
169
170 // Verify it.
171 if (StringOffset >= Parent->getStringTable().size()) {
172 uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
173 Parent->getData().data();
174 return malformedError("long name offset " + Twine(StringOffset) + " past "
175 "the end of the string table for archive member "
176 "header at offset " + Twine(ArchiveOffset));
177 }
178
179 // GNU long file names end with a "/\n".
180 if (Parent->kind() == Archive::K_GNU ||
181 Parent->kind() == Archive::K_GNU64) {
182 size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
183 if (End == StringRef::npos || End < 1 ||
184 Parent->getStringTable()[End - 1] != '/') {
185 return malformedError("string table at long name offset " +
186 Twine(StringOffset) + "not terminated");
187 }
188 return Parent->getStringTable().slice(StringOffset, End - 1);
189 }
190 return Parent->getStringTable().begin() + StringOffset;
191 }
192
193 if (Name.startswith("#1/")) {
194 uint64_t NameLength;
195 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
196 std::string Buf;
197 raw_string_ostream OS(Buf);
198 OS.write_escaped(Name.substr(3).rtrim(' '));
199 OS.flush();
200 uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
201 Parent->getData().data();
202 return malformedError("long name length characters after the #1/ are "
203 "not all decimal numbers: '" + Buf + "' for "
204 "archive member header at offset " +
205 Twine(ArchiveOffset));
206 }
207 if (getSizeOf() + NameLength > Size) {
208 uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
209 Parent->getData().data();
210 return malformedError("long name length: " + Twine(NameLength) +
211 " extends past the end of the member or archive "
212 "for archive member header at offset " +
213 Twine(ArchiveOffset));
214 }
215 return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
216 NameLength).rtrim('\0');
217 }
218
219 // It is not a long name so trim the blanks at the end of the name.
220 if (Name[Name.size() - 1] != '/')
221 return Name.rtrim(' ');
222
223 // It's a simple name.
224 return Name.drop_back(1);
225 }
226
getSize() const227 Expected<uint32_t> ArchiveMemberHeader::getSize() const {
228 uint32_t Ret;
229 if (StringRef(ArMemHdr->Size,
230 sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
231 std::string Buf;
232 raw_string_ostream OS(Buf);
233 OS.write_escaped(StringRef(ArMemHdr->Size,
234 sizeof(ArMemHdr->Size)).rtrim(" "));
235 OS.flush();
236 uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
237 Parent->getData().data();
238 return malformedError("characters in size field in archive header are not "
239 "all decimal numbers: '" + Buf + "' for archive "
240 "member header at offset " + Twine(Offset));
241 }
242 return Ret;
243 }
244
getAccessMode() const245 Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
246 unsigned Ret;
247 if (StringRef(ArMemHdr->AccessMode,
248 sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
249 std::string Buf;
250 raw_string_ostream OS(Buf);
251 OS.write_escaped(StringRef(ArMemHdr->AccessMode,
252 sizeof(ArMemHdr->AccessMode)).rtrim(" "));
253 OS.flush();
254 uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
255 Parent->getData().data();
256 return malformedError("characters in AccessMode field in archive header "
257 "are not all decimal numbers: '" + Buf + "' for the "
258 "archive member header at offset " + Twine(Offset));
259 }
260 return static_cast<sys::fs::perms>(Ret);
261 }
262
263 Expected<sys::TimePoint<std::chrono::seconds>>
getLastModified() const264 ArchiveMemberHeader::getLastModified() const {
265 unsigned Seconds;
266 if (StringRef(ArMemHdr->LastModified,
267 sizeof(ArMemHdr->LastModified)).rtrim(' ')
268 .getAsInteger(10, Seconds)) {
269 std::string Buf;
270 raw_string_ostream OS(Buf);
271 OS.write_escaped(StringRef(ArMemHdr->LastModified,
272 sizeof(ArMemHdr->LastModified)).rtrim(" "));
273 OS.flush();
274 uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
275 Parent->getData().data();
276 return malformedError("characters in LastModified field in archive header "
277 "are not all decimal numbers: '" + Buf + "' for the "
278 "archive member header at offset " + Twine(Offset));
279 }
280
281 return sys::toTimePoint(Seconds);
282 }
283
getUID() const284 Expected<unsigned> ArchiveMemberHeader::getUID() const {
285 unsigned Ret;
286 StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
287 if (User.empty())
288 return 0;
289 if (User.getAsInteger(10, Ret)) {
290 std::string Buf;
291 raw_string_ostream OS(Buf);
292 OS.write_escaped(User);
293 OS.flush();
294 uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
295 Parent->getData().data();
296 return malformedError("characters in UID field in archive header "
297 "are not all decimal numbers: '" + Buf + "' for the "
298 "archive member header at offset " + Twine(Offset));
299 }
300 return Ret;
301 }
302
getGID() const303 Expected<unsigned> ArchiveMemberHeader::getGID() const {
304 unsigned Ret;
305 StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
306 if (Group.empty())
307 return 0;
308 if (Group.getAsInteger(10, Ret)) {
309 std::string Buf;
310 raw_string_ostream OS(Buf);
311 OS.write_escaped(Group);
312 OS.flush();
313 uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
314 Parent->getData().data();
315 return malformedError("characters in GID field in archive header "
316 "are not all decimal numbers: '" + Buf + "' for the "
317 "archive member header at offset " + Twine(Offset));
318 }
319 return Ret;
320 }
321
Child(const Archive * Parent,StringRef Data,uint16_t StartOfFile)322 Archive::Child::Child(const Archive *Parent, StringRef Data,
323 uint16_t StartOfFile)
324 : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
325 Data(Data), StartOfFile(StartOfFile) {
326 }
327
Child(const Archive * Parent,const char * Start,Error * Err)328 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
329 : Parent(Parent),
330 Header(Parent, Start,
331 Parent
332 ? Parent->getData().size() - (Start - Parent->getData().data())
333 : 0, Err) {
334 if (!Start)
335 return;
336
337 // If we are pointed to real data, Start is not a nullptr, then there must be
338 // a non-null Err pointer available to report malformed data on. Only in
339 // the case sentinel value is being constructed is Err is permitted to be a
340 // nullptr.
341 assert(Err && "Err can't be nullptr if Start is not a nullptr");
342
343 ErrorAsOutParameter ErrAsOutParam(Err);
344
345 // If there was an error in the construction of the Header
346 // then just return with the error now set.
347 if (*Err)
348 return;
349
350 uint64_t Size = Header.getSizeOf();
351 Data = StringRef(Start, Size);
352 Expected<bool> isThinOrErr = isThinMember();
353 if (!isThinOrErr) {
354 *Err = isThinOrErr.takeError();
355 return;
356 }
357 bool isThin = isThinOrErr.get();
358 if (!isThin) {
359 Expected<uint64_t> MemberSize = getRawSize();
360 if (!MemberSize) {
361 *Err = MemberSize.takeError();
362 return;
363 }
364 Size += MemberSize.get();
365 Data = StringRef(Start, Size);
366 }
367
368 // Setup StartOfFile and PaddingBytes.
369 StartOfFile = Header.getSizeOf();
370 // Don't include attached name.
371 Expected<StringRef> NameOrErr = getRawName();
372 if (!NameOrErr){
373 *Err = NameOrErr.takeError();
374 return;
375 }
376 StringRef Name = NameOrErr.get();
377 if (Name.startswith("#1/")) {
378 uint64_t NameSize;
379 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
380 std::string Buf;
381 raw_string_ostream OS(Buf);
382 OS.write_escaped(Name.substr(3).rtrim(' '));
383 OS.flush();
384 uint64_t Offset = Start - Parent->getData().data();
385 *Err = malformedError("long name length characters after the #1/ are "
386 "not all decimal numbers: '" + Buf + "' for "
387 "archive member header at offset " +
388 Twine(Offset));
389 return;
390 }
391 StartOfFile += NameSize;
392 }
393 }
394
getSize() const395 Expected<uint64_t> Archive::Child::getSize() const {
396 if (Parent->IsThin) {
397 Expected<uint32_t> Size = Header.getSize();
398 if (!Size)
399 return Size.takeError();
400 return Size.get();
401 }
402 return Data.size() - StartOfFile;
403 }
404
getRawSize() const405 Expected<uint64_t> Archive::Child::getRawSize() const {
406 return Header.getSize();
407 }
408
isThinMember() const409 Expected<bool> Archive::Child::isThinMember() const {
410 Expected<StringRef> NameOrErr = Header.getRawName();
411 if (!NameOrErr)
412 return NameOrErr.takeError();
413 StringRef Name = NameOrErr.get();
414 return Parent->IsThin && Name != "/" && Name != "//";
415 }
416
getFullName() const417 Expected<std::string> Archive::Child::getFullName() const {
418 Expected<bool> isThin = isThinMember();
419 if (!isThin)
420 return isThin.takeError();
421 assert(isThin.get());
422 Expected<StringRef> NameOrErr = getName();
423 if (!NameOrErr)
424 return NameOrErr.takeError();
425 StringRef Name = *NameOrErr;
426 if (sys::path::is_absolute(Name))
427 return Name;
428
429 SmallString<128> FullName = sys::path::parent_path(
430 Parent->getMemoryBufferRef().getBufferIdentifier());
431 sys::path::append(FullName, Name);
432 return StringRef(FullName);
433 }
434
getBuffer() const435 Expected<StringRef> Archive::Child::getBuffer() const {
436 Expected<bool> isThinOrErr = isThinMember();
437 if (!isThinOrErr)
438 return isThinOrErr.takeError();
439 bool isThin = isThinOrErr.get();
440 if (!isThin) {
441 Expected<uint32_t> Size = getSize();
442 if (!Size)
443 return Size.takeError();
444 return StringRef(Data.data() + StartOfFile, Size.get());
445 }
446 Expected<std::string> FullNameOrErr = getFullName();
447 if (!FullNameOrErr)
448 return FullNameOrErr.takeError();
449 const std::string &FullName = *FullNameOrErr;
450 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
451 if (std::error_code EC = Buf.getError())
452 return errorCodeToError(EC);
453 Parent->ThinBuffers.push_back(std::move(*Buf));
454 return Parent->ThinBuffers.back()->getBuffer();
455 }
456
getNext() const457 Expected<Archive::Child> Archive::Child::getNext() const {
458 size_t SpaceToSkip = Data.size();
459 // If it's odd, add 1 to make it even.
460 if (SpaceToSkip & 1)
461 ++SpaceToSkip;
462
463 const char *NextLoc = Data.data() + SpaceToSkip;
464
465 // Check to see if this is at the end of the archive.
466 if (NextLoc == Parent->Data.getBufferEnd())
467 return Child(nullptr, nullptr, nullptr);
468
469 // Check to see if this is past the end of the archive.
470 if (NextLoc > Parent->Data.getBufferEnd()) {
471 std::string Msg("offset to next archive member past the end of the archive "
472 "after member ");
473 Expected<StringRef> NameOrErr = getName();
474 if (!NameOrErr) {
475 consumeError(NameOrErr.takeError());
476 uint64_t Offset = Data.data() - Parent->getData().data();
477 return malformedError(Msg + "at offset " + Twine(Offset));
478 } else
479 return malformedError(Msg + NameOrErr.get());
480 }
481
482 Error Err = Error::success();
483 Child Ret(Parent, NextLoc, &Err);
484 if (Err)
485 return std::move(Err);
486 return Ret;
487 }
488
getChildOffset() const489 uint64_t Archive::Child::getChildOffset() const {
490 const char *a = Parent->Data.getBuffer().data();
491 const char *c = Data.data();
492 uint64_t offset = c - a;
493 return offset;
494 }
495
getName() const496 Expected<StringRef> Archive::Child::getName() const {
497 Expected<uint64_t> RawSizeOrErr = getRawSize();
498 if (!RawSizeOrErr)
499 return RawSizeOrErr.takeError();
500 uint64_t RawSize = RawSizeOrErr.get();
501 Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
502 if (!NameOrErr)
503 return NameOrErr.takeError();
504 StringRef Name = NameOrErr.get();
505 return Name;
506 }
507
getMemoryBufferRef() const508 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
509 Expected<StringRef> NameOrErr = getName();
510 if (!NameOrErr)
511 return NameOrErr.takeError();
512 StringRef Name = NameOrErr.get();
513 Expected<StringRef> Buf = getBuffer();
514 if (!Buf)
515 return Buf.takeError();
516 return MemoryBufferRef(*Buf, Name);
517 }
518
519 Expected<std::unique_ptr<Binary>>
getAsBinary(LLVMContext * Context) const520 Archive::Child::getAsBinary(LLVMContext *Context) const {
521 Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
522 if (!BuffOrErr)
523 return BuffOrErr.takeError();
524
525 auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
526 if (BinaryOrErr)
527 return std::move(*BinaryOrErr);
528 return BinaryOrErr.takeError();
529 }
530
create(MemoryBufferRef Source)531 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
532 Error Err = Error::success();
533 std::unique_ptr<Archive> Ret(new Archive(Source, Err));
534 if (Err)
535 return std::move(Err);
536 return std::move(Ret);
537 }
538
setFirstRegular(const Child & C)539 void Archive::setFirstRegular(const Child &C) {
540 FirstRegularData = C.Data;
541 FirstRegularStartOfFile = C.StartOfFile;
542 }
543
Archive(MemoryBufferRef Source,Error & Err)544 Archive::Archive(MemoryBufferRef Source, Error &Err)
545 : Binary(Binary::ID_Archive, Source) {
546 ErrorAsOutParameter ErrAsOutParam(&Err);
547 StringRef Buffer = Data.getBuffer();
548 // Check for sufficient magic.
549 if (Buffer.startswith(ThinMagic)) {
550 IsThin = true;
551 } else if (Buffer.startswith(Magic)) {
552 IsThin = false;
553 } else {
554 Err = make_error<GenericBinaryError>("File too small to be an archive",
555 object_error::invalid_file_type);
556 return;
557 }
558
559 // Make sure Format is initialized before any call to
560 // ArchiveMemberHeader::getName() is made. This could be a valid empty
561 // archive which is the same in all formats. So claiming it to be gnu to is
562 // fine if not totally correct before we look for a string table or table of
563 // contents.
564 Format = K_GNU;
565
566 // Get the special members.
567 child_iterator I = child_begin(Err, false);
568 if (Err)
569 return;
570 child_iterator E = child_end();
571
572 // See if this is a valid empty archive and if so return.
573 if (I == E) {
574 Err = Error::success();
575 return;
576 }
577 const Child *C = &*I;
578
579 auto Increment = [&]() {
580 ++I;
581 if (Err)
582 return true;
583 C = &*I;
584 return false;
585 };
586
587 Expected<StringRef> NameOrErr = C->getRawName();
588 if (!NameOrErr) {
589 Err = NameOrErr.takeError();
590 return;
591 }
592 StringRef Name = NameOrErr.get();
593
594 // Below is the pattern that is used to figure out the archive format
595 // GNU archive format
596 // First member : / (may exist, if it exists, points to the symbol table )
597 // Second member : // (may exist, if it exists, points to the string table)
598 // Note : The string table is used if the filename exceeds 15 characters
599 // BSD archive format
600 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
601 // There is no string table, if the filename exceeds 15 characters or has a
602 // embedded space, the filename has #1/<size>, The size represents the size
603 // of the filename that needs to be read after the archive header
604 // COFF archive format
605 // First member : /
606 // Second member : / (provides a directory of symbols)
607 // Third member : // (may exist, if it exists, contains the string table)
608 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
609 // even if the string table is empty. However, lib.exe does not in fact
610 // seem to create the third member if there's no member whose filename
611 // exceeds 15 characters. So the third member is optional.
612
613 if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
614 if (Name == "__.SYMDEF")
615 Format = K_BSD;
616 else // Name == "__.SYMDEF_64"
617 Format = K_DARWIN64;
618 // We know that the symbol table is not an external file, but we still must
619 // check any Expected<> return value.
620 Expected<StringRef> BufOrErr = C->getBuffer();
621 if (!BufOrErr) {
622 Err = BufOrErr.takeError();
623 return;
624 }
625 SymbolTable = BufOrErr.get();
626 if (Increment())
627 return;
628 setFirstRegular(*C);
629
630 Err = Error::success();
631 return;
632 }
633
634 if (Name.startswith("#1/")) {
635 Format = K_BSD;
636 // We know this is BSD, so getName will work since there is no string table.
637 Expected<StringRef> NameOrErr = C->getName();
638 if (!NameOrErr) {
639 Err = NameOrErr.takeError();
640 return;
641 }
642 Name = NameOrErr.get();
643 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
644 // We know that the symbol table is not an external file, but we still
645 // must check any Expected<> return value.
646 Expected<StringRef> BufOrErr = C->getBuffer();
647 if (!BufOrErr) {
648 Err = BufOrErr.takeError();
649 return;
650 }
651 SymbolTable = BufOrErr.get();
652 if (Increment())
653 return;
654 }
655 else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
656 Format = K_DARWIN64;
657 // We know that the symbol table is not an external file, but we still
658 // must check any Expected<> return value.
659 Expected<StringRef> BufOrErr = C->getBuffer();
660 if (!BufOrErr) {
661 Err = BufOrErr.takeError();
662 return;
663 }
664 SymbolTable = BufOrErr.get();
665 if (Increment())
666 return;
667 }
668 setFirstRegular(*C);
669 return;
670 }
671
672 // MIPS 64-bit ELF archives use a special format of a symbol table.
673 // This format is marked by `ar_name` field equals to "/SYM64/".
674 // For detailed description see page 96 in the following document:
675 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
676
677 bool has64SymTable = false;
678 if (Name == "/" || Name == "/SYM64/") {
679 // We know that the symbol table is not an external file, but we still
680 // must check any Expected<> return value.
681 Expected<StringRef> BufOrErr = C->getBuffer();
682 if (!BufOrErr) {
683 Err = BufOrErr.takeError();
684 return;
685 }
686 SymbolTable = BufOrErr.get();
687 if (Name == "/SYM64/")
688 has64SymTable = true;
689
690 if (Increment())
691 return;
692 if (I == E) {
693 Err = Error::success();
694 return;
695 }
696 Expected<StringRef> NameOrErr = C->getRawName();
697 if (!NameOrErr) {
698 Err = NameOrErr.takeError();
699 return;
700 }
701 Name = NameOrErr.get();
702 }
703
704 if (Name == "//") {
705 Format = has64SymTable ? K_GNU64 : K_GNU;
706 // The string table is never an external member, but we still
707 // must check any Expected<> return value.
708 Expected<StringRef> BufOrErr = C->getBuffer();
709 if (!BufOrErr) {
710 Err = BufOrErr.takeError();
711 return;
712 }
713 StringTable = BufOrErr.get();
714 if (Increment())
715 return;
716 setFirstRegular(*C);
717 Err = Error::success();
718 return;
719 }
720
721 if (Name[0] != '/') {
722 Format = has64SymTable ? K_GNU64 : K_GNU;
723 setFirstRegular(*C);
724 Err = Error::success();
725 return;
726 }
727
728 if (Name != "/") {
729 Err = errorCodeToError(object_error::parse_failed);
730 return;
731 }
732
733 Format = K_COFF;
734 // We know that the symbol table is not an external file, but we still
735 // must check any Expected<> return value.
736 Expected<StringRef> BufOrErr = C->getBuffer();
737 if (!BufOrErr) {
738 Err = BufOrErr.takeError();
739 return;
740 }
741 SymbolTable = BufOrErr.get();
742
743 if (Increment())
744 return;
745
746 if (I == E) {
747 setFirstRegular(*C);
748 Err = Error::success();
749 return;
750 }
751
752 NameOrErr = C->getRawName();
753 if (!NameOrErr) {
754 Err = NameOrErr.takeError();
755 return;
756 }
757 Name = NameOrErr.get();
758
759 if (Name == "//") {
760 // The string table is never an external member, but we still
761 // must check any Expected<> return value.
762 Expected<StringRef> BufOrErr = C->getBuffer();
763 if (!BufOrErr) {
764 Err = BufOrErr.takeError();
765 return;
766 }
767 StringTable = BufOrErr.get();
768 if (Increment())
769 return;
770 }
771
772 setFirstRegular(*C);
773 Err = Error::success();
774 }
775
child_begin(Error & Err,bool SkipInternal) const776 Archive::child_iterator Archive::child_begin(Error &Err,
777 bool SkipInternal) const {
778 if (isEmpty())
779 return child_end();
780
781 if (SkipInternal)
782 return child_iterator(Child(this, FirstRegularData,
783 FirstRegularStartOfFile),
784 &Err);
785
786 const char *Loc = Data.getBufferStart() + strlen(Magic);
787 Child C(this, Loc, &Err);
788 if (Err)
789 return child_end();
790 return child_iterator(C, &Err);
791 }
792
child_end() const793 Archive::child_iterator Archive::child_end() const {
794 return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
795 }
796
getName() const797 StringRef Archive::Symbol::getName() const {
798 return Parent->getSymbolTable().begin() + StringIndex;
799 }
800
getMember() const801 Expected<Archive::Child> Archive::Symbol::getMember() const {
802 const char *Buf = Parent->getSymbolTable().begin();
803 const char *Offsets = Buf;
804 if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
805 Offsets += sizeof(uint64_t);
806 else
807 Offsets += sizeof(uint32_t);
808 uint64_t Offset = 0;
809 if (Parent->kind() == K_GNU) {
810 Offset = read32be(Offsets + SymbolIndex * 4);
811 } else if (Parent->kind() == K_GNU64) {
812 Offset = read64be(Offsets + SymbolIndex * 8);
813 } else if (Parent->kind() == K_BSD) {
814 // The SymbolIndex is an index into the ranlib structs that start at
815 // Offsets (the first uint32_t is the number of bytes of the ranlib
816 // structs). The ranlib structs are a pair of uint32_t's the first
817 // being a string table offset and the second being the offset into
818 // the archive of the member that defines the symbol. Which is what
819 // is needed here.
820 Offset = read32le(Offsets + SymbolIndex * 8 + 4);
821 } else if (Parent->kind() == K_DARWIN64) {
822 // The SymbolIndex is an index into the ranlib_64 structs that start at
823 // Offsets (the first uint64_t is the number of bytes of the ranlib_64
824 // structs). The ranlib_64 structs are a pair of uint64_t's the first
825 // being a string table offset and the second being the offset into
826 // the archive of the member that defines the symbol. Which is what
827 // is needed here.
828 Offset = read64le(Offsets + SymbolIndex * 16 + 8);
829 } else {
830 // Skip offsets.
831 uint32_t MemberCount = read32le(Buf);
832 Buf += MemberCount * 4 + 4;
833
834 uint32_t SymbolCount = read32le(Buf);
835 if (SymbolIndex >= SymbolCount)
836 return errorCodeToError(object_error::parse_failed);
837
838 // Skip SymbolCount to get to the indices table.
839 const char *Indices = Buf + 4;
840
841 // Get the index of the offset in the file member offset table for this
842 // symbol.
843 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
844 // Subtract 1 since OffsetIndex is 1 based.
845 --OffsetIndex;
846
847 if (OffsetIndex >= MemberCount)
848 return errorCodeToError(object_error::parse_failed);
849
850 Offset = read32le(Offsets + OffsetIndex * 4);
851 }
852
853 const char *Loc = Parent->getData().begin() + Offset;
854 Error Err = Error::success();
855 Child C(Parent, Loc, &Err);
856 if (Err)
857 return std::move(Err);
858 return C;
859 }
860
getNext() const861 Archive::Symbol Archive::Symbol::getNext() const {
862 Symbol t(*this);
863 if (Parent->kind() == K_BSD) {
864 // t.StringIndex is an offset from the start of the __.SYMDEF or
865 // "__.SYMDEF SORTED" member into the string table for the ranlib
866 // struct indexed by t.SymbolIndex . To change t.StringIndex to the
867 // offset in the string table for t.SymbolIndex+1 we subtract the
868 // its offset from the start of the string table for t.SymbolIndex
869 // and add the offset of the string table for t.SymbolIndex+1.
870
871 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
872 // which is the number of bytes of ranlib structs that follow. The ranlib
873 // structs are a pair of uint32_t's the first being a string table offset
874 // and the second being the offset into the archive of the member that
875 // define the symbol. After that the next uint32_t is the byte count of
876 // the string table followed by the string table.
877 const char *Buf = Parent->getSymbolTable().begin();
878 uint32_t RanlibCount = 0;
879 RanlibCount = read32le(Buf) / 8;
880 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
881 // don't change the t.StringIndex as we don't want to reference a ranlib
882 // past RanlibCount.
883 if (t.SymbolIndex + 1 < RanlibCount) {
884 const char *Ranlibs = Buf + 4;
885 uint32_t CurRanStrx = 0;
886 uint32_t NextRanStrx = 0;
887 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
888 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
889 t.StringIndex -= CurRanStrx;
890 t.StringIndex += NextRanStrx;
891 }
892 } else {
893 // Go to one past next null.
894 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
895 }
896 ++t.SymbolIndex;
897 return t;
898 }
899
symbol_begin() const900 Archive::symbol_iterator Archive::symbol_begin() const {
901 if (!hasSymbolTable())
902 return symbol_iterator(Symbol(this, 0, 0));
903
904 const char *buf = getSymbolTable().begin();
905 if (kind() == K_GNU) {
906 uint32_t symbol_count = 0;
907 symbol_count = read32be(buf);
908 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
909 } else if (kind() == K_GNU64) {
910 uint64_t symbol_count = read64be(buf);
911 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
912 } else if (kind() == K_BSD) {
913 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
914 // which is the number of bytes of ranlib structs that follow. The ranlib
915 // structs are a pair of uint32_t's the first being a string table offset
916 // and the second being the offset into the archive of the member that
917 // define the symbol. After that the next uint32_t is the byte count of
918 // the string table followed by the string table.
919 uint32_t ranlib_count = 0;
920 ranlib_count = read32le(buf) / 8;
921 const char *ranlibs = buf + 4;
922 uint32_t ran_strx = 0;
923 ran_strx = read32le(ranlibs);
924 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
925 // Skip the byte count of the string table.
926 buf += sizeof(uint32_t);
927 buf += ran_strx;
928 } else if (kind() == K_DARWIN64) {
929 // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
930 // which is the number of bytes of ranlib_64 structs that follow. The
931 // ranlib_64 structs are a pair of uint64_t's the first being a string
932 // table offset and the second being the offset into the archive of the
933 // member that define the symbol. After that the next uint64_t is the byte
934 // count of the string table followed by the string table.
935 uint64_t ranlib_count = 0;
936 ranlib_count = read64le(buf) / 16;
937 const char *ranlibs = buf + 8;
938 uint64_t ran_strx = 0;
939 ran_strx = read64le(ranlibs);
940 buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
941 // Skip the byte count of the string table.
942 buf += sizeof(uint64_t);
943 buf += ran_strx;
944 } else {
945 uint32_t member_count = 0;
946 uint32_t symbol_count = 0;
947 member_count = read32le(buf);
948 buf += 4 + (member_count * 4); // Skip offsets.
949 symbol_count = read32le(buf);
950 buf += 4 + (symbol_count * 2); // Skip indices.
951 }
952 uint32_t string_start_offset = buf - getSymbolTable().begin();
953 return symbol_iterator(Symbol(this, 0, string_start_offset));
954 }
955
symbol_end() const956 Archive::symbol_iterator Archive::symbol_end() const {
957 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
958 }
959
getNumberOfSymbols() const960 uint32_t Archive::getNumberOfSymbols() const {
961 if (!hasSymbolTable())
962 return 0;
963 const char *buf = getSymbolTable().begin();
964 if (kind() == K_GNU)
965 return read32be(buf);
966 if (kind() == K_GNU64)
967 return read64be(buf);
968 if (kind() == K_BSD)
969 return read32le(buf) / 8;
970 if (kind() == K_DARWIN64)
971 return read64le(buf) / 16;
972 uint32_t member_count = 0;
973 member_count = read32le(buf);
974 buf += 4 + (member_count * 4); // Skip offsets.
975 return read32le(buf);
976 }
977
findSym(StringRef name) const978 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
979 Archive::symbol_iterator bs = symbol_begin();
980 Archive::symbol_iterator es = symbol_end();
981
982 for (; bs != es; ++bs) {
983 StringRef SymName = bs->getName();
984 if (SymName == name) {
985 if (auto MemberOrErr = bs->getMember())
986 return Child(*MemberOrErr);
987 else
988 return MemberOrErr.takeError();
989 }
990 }
991 return Optional<Child>();
992 }
993
994 // Returns true if archive file contains no member file.
isEmpty() const995 bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
996
hasSymbolTable() const997 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
998