1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the ArchiveObjectFile class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Object/Archive.h"
15 #include "llvm/ADT/Optional.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Object/Binary.h"
20 #include "llvm/Object/Error.h"
21 #include "llvm/Support/Chrono.h"
22 #include "llvm/Support/Endian.h"
23 #include "llvm/Support/Error.h"
24 #include "llvm/Support/ErrorOr.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstddef>
32 #include <cstdint>
33 #include <cstring>
34 #include <memory>
35 #include <string>
36 #include <system_error>
37 
38 using namespace llvm;
39 using namespace object;
40 using namespace llvm::support::endian;
41 
42 static const char *const Magic = "!<arch>\n";
43 static const char *const ThinMagic = "!<thin>\n";
44 
anchor()45 void Archive::anchor() {}
46 
47 static Error
malformedError(Twine Msg)48 malformedError(Twine Msg) {
49   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
50   return make_error<GenericBinaryError>(std::move(StringMsg),
51                                         object_error::parse_failed);
52 }
53 
ArchiveMemberHeader(const Archive * Parent,const char * RawHeaderPtr,uint64_t Size,Error * Err)54 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
55                                          const char *RawHeaderPtr,
56                                          uint64_t Size, Error *Err)
57     : Parent(Parent),
58       ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
59   if (RawHeaderPtr == nullptr)
60     return;
61   ErrorAsOutParameter ErrAsOutParam(Err);
62 
63   if (Size < sizeof(ArMemHdrType)) {
64     if (Err) {
65       std::string Msg("remaining size of archive too small for next archive "
66                       "member header ");
67       Expected<StringRef> NameOrErr = getName(Size);
68       if (!NameOrErr) {
69         consumeError(NameOrErr.takeError());
70         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
71         *Err = malformedError(Msg + "at offset " + Twine(Offset));
72       } else
73         *Err = malformedError(Msg + "for " + NameOrErr.get());
74     }
75     return;
76   }
77   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
78     if (Err) {
79       std::string Buf;
80       raw_string_ostream OS(Buf);
81       OS.write_escaped(StringRef(ArMemHdr->Terminator,
82                                  sizeof(ArMemHdr->Terminator)));
83       OS.flush();
84       std::string Msg("terminator characters in archive member \"" + Buf +
85                       "\" not the correct \"`\\n\" values for the archive "
86                       "member header ");
87       Expected<StringRef> NameOrErr = getName(Size);
88       if (!NameOrErr) {
89         consumeError(NameOrErr.takeError());
90         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
91         *Err = malformedError(Msg + "at offset " + Twine(Offset));
92       } else
93         *Err = malformedError(Msg + "for " + NameOrErr.get());
94     }
95     return;
96   }
97 }
98 
99 // This gets the raw name from the ArMemHdr->Name field and checks that it is
100 // valid for the kind of archive.  If it is not valid it returns an Error.
getRawName() const101 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
102   char EndCond;
103   auto Kind = Parent->kind();
104   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
105     if (ArMemHdr->Name[0] == ' ') {
106       uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
107                         Parent->getData().data();
108       return malformedError("name contains a leading space for archive member "
109                             "header at offset " + Twine(Offset));
110     }
111     EndCond = ' ';
112   }
113   else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
114     EndCond = ' ';
115   else
116     EndCond = '/';
117   StringRef::size_type end =
118       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
119   if (end == StringRef::npos)
120     end = sizeof(ArMemHdr->Name);
121   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
122   // Don't include the EndCond if there is one.
123   return StringRef(ArMemHdr->Name, end);
124 }
125 
126 // This gets the name looking up long names. Size is the size of the archive
127 // member including the header, so the size of any name following the header
128 // is checked to make sure it does not overflow.
getName(uint64_t Size) const129 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
130 
131   // This can be called from the ArchiveMemberHeader constructor when the
132   // archive header is truncated to produce an error message with the name.
133   // Make sure the name field is not truncated.
134   if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
135     uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
136                       Parent->getData().data();
137     return malformedError("archive header truncated before the name field "
138                           "for archive member header at offset " +
139                           Twine(ArchiveOffset));
140   }
141 
142   // The raw name itself can be invalid.
143   Expected<StringRef> NameOrErr = getRawName();
144   if (!NameOrErr)
145     return NameOrErr.takeError();
146   StringRef Name = NameOrErr.get();
147 
148   // Check if it's a special name.
149   if (Name[0] == '/') {
150     if (Name.size() == 1) // Linker member.
151       return Name;
152     if (Name.size() == 2 && Name[1] == '/') // String table.
153       return Name;
154     // It's a long name.
155     // Get the string table offset.
156     std::size_t StringOffset;
157     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
158       std::string Buf;
159       raw_string_ostream OS(Buf);
160       OS.write_escaped(Name.substr(1).rtrim(' '));
161       OS.flush();
162       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
163                                Parent->getData().data();
164       return malformedError("long name offset characters after the '/' are "
165                             "not all decimal numbers: '" + Buf + "' for "
166                             "archive member header at offset " +
167                             Twine(ArchiveOffset));
168     }
169 
170     // Verify it.
171     if (StringOffset >= Parent->getStringTable().size()) {
172       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
173                                Parent->getData().data();
174       return malformedError("long name offset " + Twine(StringOffset) + " past "
175                             "the end of the string table for archive member "
176                             "header at offset " + Twine(ArchiveOffset));
177     }
178 
179     // GNU long file names end with a "/\n".
180     if (Parent->kind() == Archive::K_GNU ||
181         Parent->kind() == Archive::K_GNU64) {
182       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
183       if (End == StringRef::npos || End < 1 ||
184           Parent->getStringTable()[End - 1] != '/') {
185         return malformedError("string table at long name offset " +
186                               Twine(StringOffset) + "not terminated");
187       }
188       return Parent->getStringTable().slice(StringOffset, End - 1);
189     }
190     return Parent->getStringTable().begin() + StringOffset;
191   }
192 
193   if (Name.startswith("#1/")) {
194     uint64_t NameLength;
195     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
196       std::string Buf;
197       raw_string_ostream OS(Buf);
198       OS.write_escaped(Name.substr(3).rtrim(' '));
199       OS.flush();
200       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
201                         Parent->getData().data();
202       return malformedError("long name length characters after the #1/ are "
203                             "not all decimal numbers: '" + Buf + "' for "
204                             "archive member header at offset " +
205                             Twine(ArchiveOffset));
206     }
207     if (getSizeOf() + NameLength > Size) {
208       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
209                         Parent->getData().data();
210       return malformedError("long name length: " + Twine(NameLength) +
211                             " extends past the end of the member or archive "
212                             "for archive member header at offset " +
213                             Twine(ArchiveOffset));
214     }
215     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
216                      NameLength).rtrim('\0');
217   }
218 
219   // It is not a long name so trim the blanks at the end of the name.
220   if (Name[Name.size() - 1] != '/')
221     return Name.rtrim(' ');
222 
223   // It's a simple name.
224   return Name.drop_back(1);
225 }
226 
getSize() const227 Expected<uint32_t> ArchiveMemberHeader::getSize() const {
228   uint32_t Ret;
229   if (StringRef(ArMemHdr->Size,
230                 sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
231     std::string Buf;
232     raw_string_ostream OS(Buf);
233     OS.write_escaped(StringRef(ArMemHdr->Size,
234                                sizeof(ArMemHdr->Size)).rtrim(" "));
235     OS.flush();
236     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
237                       Parent->getData().data();
238     return malformedError("characters in size field in archive header are not "
239                           "all decimal numbers: '" + Buf + "' for archive "
240                           "member header at offset " + Twine(Offset));
241   }
242   return Ret;
243 }
244 
getAccessMode() const245 Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
246   unsigned Ret;
247   if (StringRef(ArMemHdr->AccessMode,
248                 sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
249     std::string Buf;
250     raw_string_ostream OS(Buf);
251     OS.write_escaped(StringRef(ArMemHdr->AccessMode,
252                                sizeof(ArMemHdr->AccessMode)).rtrim(" "));
253     OS.flush();
254     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
255                       Parent->getData().data();
256     return malformedError("characters in AccessMode field in archive header "
257                           "are not all decimal numbers: '" + Buf + "' for the "
258                           "archive member header at offset " + Twine(Offset));
259   }
260   return static_cast<sys::fs::perms>(Ret);
261 }
262 
263 Expected<sys::TimePoint<std::chrono::seconds>>
getLastModified() const264 ArchiveMemberHeader::getLastModified() const {
265   unsigned Seconds;
266   if (StringRef(ArMemHdr->LastModified,
267                 sizeof(ArMemHdr->LastModified)).rtrim(' ')
268           .getAsInteger(10, Seconds)) {
269     std::string Buf;
270     raw_string_ostream OS(Buf);
271     OS.write_escaped(StringRef(ArMemHdr->LastModified,
272                                sizeof(ArMemHdr->LastModified)).rtrim(" "));
273     OS.flush();
274     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
275                       Parent->getData().data();
276     return malformedError("characters in LastModified field in archive header "
277                           "are not all decimal numbers: '" + Buf + "' for the "
278                           "archive member header at offset " + Twine(Offset));
279   }
280 
281   return sys::toTimePoint(Seconds);
282 }
283 
getUID() const284 Expected<unsigned> ArchiveMemberHeader::getUID() const {
285   unsigned Ret;
286   StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
287   if (User.empty())
288     return 0;
289   if (User.getAsInteger(10, Ret)) {
290     std::string Buf;
291     raw_string_ostream OS(Buf);
292     OS.write_escaped(User);
293     OS.flush();
294     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
295                       Parent->getData().data();
296     return malformedError("characters in UID field in archive header "
297                           "are not all decimal numbers: '" + Buf + "' for the "
298                           "archive member header at offset " + Twine(Offset));
299   }
300   return Ret;
301 }
302 
getGID() const303 Expected<unsigned> ArchiveMemberHeader::getGID() const {
304   unsigned Ret;
305   StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
306   if (Group.empty())
307     return 0;
308   if (Group.getAsInteger(10, Ret)) {
309     std::string Buf;
310     raw_string_ostream OS(Buf);
311     OS.write_escaped(Group);
312     OS.flush();
313     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
314                       Parent->getData().data();
315     return malformedError("characters in GID field in archive header "
316                           "are not all decimal numbers: '" + Buf + "' for the "
317                           "archive member header at offset " + Twine(Offset));
318   }
319   return Ret;
320 }
321 
Child(const Archive * Parent,StringRef Data,uint16_t StartOfFile)322 Archive::Child::Child(const Archive *Parent, StringRef Data,
323                       uint16_t StartOfFile)
324     : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
325       Data(Data), StartOfFile(StartOfFile) {
326 }
327 
Child(const Archive * Parent,const char * Start,Error * Err)328 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
329     : Parent(Parent),
330       Header(Parent, Start,
331              Parent
332                ? Parent->getData().size() - (Start - Parent->getData().data())
333                : 0, Err) {
334   if (!Start)
335     return;
336 
337   // If we are pointed to real data, Start is not a nullptr, then there must be
338   // a non-null Err pointer available to report malformed data on.  Only in
339   // the case sentinel value is being constructed is Err is permitted to be a
340   // nullptr.
341   assert(Err && "Err can't be nullptr if Start is not a nullptr");
342 
343   ErrorAsOutParameter ErrAsOutParam(Err);
344 
345   // If there was an error in the construction of the Header
346   // then just return with the error now set.
347   if (*Err)
348     return;
349 
350   uint64_t Size = Header.getSizeOf();
351   Data = StringRef(Start, Size);
352   Expected<bool> isThinOrErr = isThinMember();
353   if (!isThinOrErr) {
354     *Err = isThinOrErr.takeError();
355     return;
356   }
357   bool isThin = isThinOrErr.get();
358   if (!isThin) {
359     Expected<uint64_t> MemberSize = getRawSize();
360     if (!MemberSize) {
361       *Err = MemberSize.takeError();
362       return;
363     }
364     Size += MemberSize.get();
365     Data = StringRef(Start, Size);
366   }
367 
368   // Setup StartOfFile and PaddingBytes.
369   StartOfFile = Header.getSizeOf();
370   // Don't include attached name.
371   Expected<StringRef> NameOrErr = getRawName();
372   if (!NameOrErr){
373     *Err = NameOrErr.takeError();
374     return;
375   }
376   StringRef Name = NameOrErr.get();
377   if (Name.startswith("#1/")) {
378     uint64_t NameSize;
379     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
380       std::string Buf;
381       raw_string_ostream OS(Buf);
382       OS.write_escaped(Name.substr(3).rtrim(' '));
383       OS.flush();
384       uint64_t Offset = Start - Parent->getData().data();
385       *Err = malformedError("long name length characters after the #1/ are "
386                             "not all decimal numbers: '" + Buf + "' for "
387                             "archive member header at offset " +
388                             Twine(Offset));
389       return;
390     }
391     StartOfFile += NameSize;
392   }
393 }
394 
getSize() const395 Expected<uint64_t> Archive::Child::getSize() const {
396   if (Parent->IsThin) {
397     Expected<uint32_t> Size = Header.getSize();
398     if (!Size)
399       return Size.takeError();
400     return Size.get();
401   }
402   return Data.size() - StartOfFile;
403 }
404 
getRawSize() const405 Expected<uint64_t> Archive::Child::getRawSize() const {
406   return Header.getSize();
407 }
408 
isThinMember() const409 Expected<bool> Archive::Child::isThinMember() const {
410   Expected<StringRef> NameOrErr = Header.getRawName();
411   if (!NameOrErr)
412     return NameOrErr.takeError();
413   StringRef Name = NameOrErr.get();
414   return Parent->IsThin && Name != "/" && Name != "//";
415 }
416 
getFullName() const417 Expected<std::string> Archive::Child::getFullName() const {
418   Expected<bool> isThin = isThinMember();
419   if (!isThin)
420     return isThin.takeError();
421   assert(isThin.get());
422   Expected<StringRef> NameOrErr = getName();
423   if (!NameOrErr)
424     return NameOrErr.takeError();
425   StringRef Name = *NameOrErr;
426   if (sys::path::is_absolute(Name))
427     return Name;
428 
429   SmallString<128> FullName = sys::path::parent_path(
430       Parent->getMemoryBufferRef().getBufferIdentifier());
431   sys::path::append(FullName, Name);
432   return StringRef(FullName);
433 }
434 
getBuffer() const435 Expected<StringRef> Archive::Child::getBuffer() const {
436   Expected<bool> isThinOrErr = isThinMember();
437   if (!isThinOrErr)
438     return isThinOrErr.takeError();
439   bool isThin = isThinOrErr.get();
440   if (!isThin) {
441     Expected<uint32_t> Size = getSize();
442     if (!Size)
443       return Size.takeError();
444     return StringRef(Data.data() + StartOfFile, Size.get());
445   }
446   Expected<std::string> FullNameOrErr = getFullName();
447   if (!FullNameOrErr)
448     return FullNameOrErr.takeError();
449   const std::string &FullName = *FullNameOrErr;
450   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
451   if (std::error_code EC = Buf.getError())
452     return errorCodeToError(EC);
453   Parent->ThinBuffers.push_back(std::move(*Buf));
454   return Parent->ThinBuffers.back()->getBuffer();
455 }
456 
getNext() const457 Expected<Archive::Child> Archive::Child::getNext() const {
458   size_t SpaceToSkip = Data.size();
459   // If it's odd, add 1 to make it even.
460   if (SpaceToSkip & 1)
461     ++SpaceToSkip;
462 
463   const char *NextLoc = Data.data() + SpaceToSkip;
464 
465   // Check to see if this is at the end of the archive.
466   if (NextLoc == Parent->Data.getBufferEnd())
467     return Child(nullptr, nullptr, nullptr);
468 
469   // Check to see if this is past the end of the archive.
470   if (NextLoc > Parent->Data.getBufferEnd()) {
471     std::string Msg("offset to next archive member past the end of the archive "
472                     "after member ");
473     Expected<StringRef> NameOrErr = getName();
474     if (!NameOrErr) {
475       consumeError(NameOrErr.takeError());
476       uint64_t Offset = Data.data() - Parent->getData().data();
477       return malformedError(Msg + "at offset " + Twine(Offset));
478     } else
479       return malformedError(Msg + NameOrErr.get());
480   }
481 
482   Error Err = Error::success();
483   Child Ret(Parent, NextLoc, &Err);
484   if (Err)
485     return std::move(Err);
486   return Ret;
487 }
488 
getChildOffset() const489 uint64_t Archive::Child::getChildOffset() const {
490   const char *a = Parent->Data.getBuffer().data();
491   const char *c = Data.data();
492   uint64_t offset = c - a;
493   return offset;
494 }
495 
getName() const496 Expected<StringRef> Archive::Child::getName() const {
497   Expected<uint64_t> RawSizeOrErr = getRawSize();
498   if (!RawSizeOrErr)
499     return RawSizeOrErr.takeError();
500   uint64_t RawSize = RawSizeOrErr.get();
501   Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
502   if (!NameOrErr)
503     return NameOrErr.takeError();
504   StringRef Name = NameOrErr.get();
505   return Name;
506 }
507 
getMemoryBufferRef() const508 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
509   Expected<StringRef> NameOrErr = getName();
510   if (!NameOrErr)
511     return NameOrErr.takeError();
512   StringRef Name = NameOrErr.get();
513   Expected<StringRef> Buf = getBuffer();
514   if (!Buf)
515     return Buf.takeError();
516   return MemoryBufferRef(*Buf, Name);
517 }
518 
519 Expected<std::unique_ptr<Binary>>
getAsBinary(LLVMContext * Context) const520 Archive::Child::getAsBinary(LLVMContext *Context) const {
521   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
522   if (!BuffOrErr)
523     return BuffOrErr.takeError();
524 
525   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
526   if (BinaryOrErr)
527     return std::move(*BinaryOrErr);
528   return BinaryOrErr.takeError();
529 }
530 
create(MemoryBufferRef Source)531 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
532   Error Err = Error::success();
533   std::unique_ptr<Archive> Ret(new Archive(Source, Err));
534   if (Err)
535     return std::move(Err);
536   return std::move(Ret);
537 }
538 
setFirstRegular(const Child & C)539 void Archive::setFirstRegular(const Child &C) {
540   FirstRegularData = C.Data;
541   FirstRegularStartOfFile = C.StartOfFile;
542 }
543 
Archive(MemoryBufferRef Source,Error & Err)544 Archive::Archive(MemoryBufferRef Source, Error &Err)
545     : Binary(Binary::ID_Archive, Source) {
546   ErrorAsOutParameter ErrAsOutParam(&Err);
547   StringRef Buffer = Data.getBuffer();
548   // Check for sufficient magic.
549   if (Buffer.startswith(ThinMagic)) {
550     IsThin = true;
551   } else if (Buffer.startswith(Magic)) {
552     IsThin = false;
553   } else {
554     Err = make_error<GenericBinaryError>("File too small to be an archive",
555                                          object_error::invalid_file_type);
556     return;
557   }
558 
559   // Make sure Format is initialized before any call to
560   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
561   // archive which is the same in all formats.  So claiming it to be gnu to is
562   // fine if not totally correct before we look for a string table or table of
563   // contents.
564   Format = K_GNU;
565 
566   // Get the special members.
567   child_iterator I = child_begin(Err, false);
568   if (Err)
569     return;
570   child_iterator E = child_end();
571 
572   // See if this is a valid empty archive and if so return.
573   if (I == E) {
574     Err = Error::success();
575     return;
576   }
577   const Child *C = &*I;
578 
579   auto Increment = [&]() {
580     ++I;
581     if (Err)
582       return true;
583     C = &*I;
584     return false;
585   };
586 
587   Expected<StringRef> NameOrErr = C->getRawName();
588   if (!NameOrErr) {
589     Err = NameOrErr.takeError();
590     return;
591   }
592   StringRef Name = NameOrErr.get();
593 
594   // Below is the pattern that is used to figure out the archive format
595   // GNU archive format
596   //  First member : / (may exist, if it exists, points to the symbol table )
597   //  Second member : // (may exist, if it exists, points to the string table)
598   //  Note : The string table is used if the filename exceeds 15 characters
599   // BSD archive format
600   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
601   //  There is no string table, if the filename exceeds 15 characters or has a
602   //  embedded space, the filename has #1/<size>, The size represents the size
603   //  of the filename that needs to be read after the archive header
604   // COFF archive format
605   //  First member : /
606   //  Second member : / (provides a directory of symbols)
607   //  Third member : // (may exist, if it exists, contains the string table)
608   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
609   //  even if the string table is empty. However, lib.exe does not in fact
610   //  seem to create the third member if there's no member whose filename
611   //  exceeds 15 characters. So the third member is optional.
612 
613   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
614     if (Name == "__.SYMDEF")
615       Format = K_BSD;
616     else // Name == "__.SYMDEF_64"
617       Format = K_DARWIN64;
618     // We know that the symbol table is not an external file, but we still must
619     // check any Expected<> return value.
620     Expected<StringRef> BufOrErr = C->getBuffer();
621     if (!BufOrErr) {
622       Err = BufOrErr.takeError();
623       return;
624     }
625     SymbolTable = BufOrErr.get();
626     if (Increment())
627       return;
628     setFirstRegular(*C);
629 
630     Err = Error::success();
631     return;
632   }
633 
634   if (Name.startswith("#1/")) {
635     Format = K_BSD;
636     // We know this is BSD, so getName will work since there is no string table.
637     Expected<StringRef> NameOrErr = C->getName();
638     if (!NameOrErr) {
639       Err = NameOrErr.takeError();
640       return;
641     }
642     Name = NameOrErr.get();
643     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
644       // We know that the symbol table is not an external file, but we still
645       // must check any Expected<> return value.
646       Expected<StringRef> BufOrErr = C->getBuffer();
647       if (!BufOrErr) {
648         Err = BufOrErr.takeError();
649         return;
650       }
651       SymbolTable = BufOrErr.get();
652       if (Increment())
653         return;
654     }
655     else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
656       Format = K_DARWIN64;
657       // We know that the symbol table is not an external file, but we still
658       // must check any Expected<> return value.
659       Expected<StringRef> BufOrErr = C->getBuffer();
660       if (!BufOrErr) {
661         Err = BufOrErr.takeError();
662         return;
663       }
664       SymbolTable = BufOrErr.get();
665       if (Increment())
666         return;
667     }
668     setFirstRegular(*C);
669     return;
670   }
671 
672   // MIPS 64-bit ELF archives use a special format of a symbol table.
673   // This format is marked by `ar_name` field equals to "/SYM64/".
674   // For detailed description see page 96 in the following document:
675   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
676 
677   bool has64SymTable = false;
678   if (Name == "/" || Name == "/SYM64/") {
679     // We know that the symbol table is not an external file, but we still
680     // must check any Expected<> return value.
681     Expected<StringRef> BufOrErr = C->getBuffer();
682     if (!BufOrErr) {
683       Err = BufOrErr.takeError();
684       return;
685     }
686     SymbolTable = BufOrErr.get();
687     if (Name == "/SYM64/")
688       has64SymTable = true;
689 
690     if (Increment())
691       return;
692     if (I == E) {
693       Err = Error::success();
694       return;
695     }
696     Expected<StringRef> NameOrErr = C->getRawName();
697     if (!NameOrErr) {
698       Err = NameOrErr.takeError();
699       return;
700     }
701     Name = NameOrErr.get();
702   }
703 
704   if (Name == "//") {
705     Format = has64SymTable ? K_GNU64 : K_GNU;
706     // The string table is never an external member, but we still
707     // must check any Expected<> return value.
708     Expected<StringRef> BufOrErr = C->getBuffer();
709     if (!BufOrErr) {
710       Err = BufOrErr.takeError();
711       return;
712     }
713     StringTable = BufOrErr.get();
714     if (Increment())
715       return;
716     setFirstRegular(*C);
717     Err = Error::success();
718     return;
719   }
720 
721   if (Name[0] != '/') {
722     Format = has64SymTable ? K_GNU64 : K_GNU;
723     setFirstRegular(*C);
724     Err = Error::success();
725     return;
726   }
727 
728   if (Name != "/") {
729     Err = errorCodeToError(object_error::parse_failed);
730     return;
731   }
732 
733   Format = K_COFF;
734   // We know that the symbol table is not an external file, but we still
735   // must check any Expected<> return value.
736   Expected<StringRef> BufOrErr = C->getBuffer();
737   if (!BufOrErr) {
738     Err = BufOrErr.takeError();
739     return;
740   }
741   SymbolTable = BufOrErr.get();
742 
743   if (Increment())
744     return;
745 
746   if (I == E) {
747     setFirstRegular(*C);
748     Err = Error::success();
749     return;
750   }
751 
752   NameOrErr = C->getRawName();
753   if (!NameOrErr) {
754     Err = NameOrErr.takeError();
755     return;
756   }
757   Name = NameOrErr.get();
758 
759   if (Name == "//") {
760     // The string table is never an external member, but we still
761     // must check any Expected<> return value.
762     Expected<StringRef> BufOrErr = C->getBuffer();
763     if (!BufOrErr) {
764       Err = BufOrErr.takeError();
765       return;
766     }
767     StringTable = BufOrErr.get();
768     if (Increment())
769       return;
770   }
771 
772   setFirstRegular(*C);
773   Err = Error::success();
774 }
775 
child_begin(Error & Err,bool SkipInternal) const776 Archive::child_iterator Archive::child_begin(Error &Err,
777                                              bool SkipInternal) const {
778   if (isEmpty())
779     return child_end();
780 
781   if (SkipInternal)
782     return child_iterator(Child(this, FirstRegularData,
783                                 FirstRegularStartOfFile),
784                           &Err);
785 
786   const char *Loc = Data.getBufferStart() + strlen(Magic);
787   Child C(this, Loc, &Err);
788   if (Err)
789     return child_end();
790   return child_iterator(C, &Err);
791 }
792 
child_end() const793 Archive::child_iterator Archive::child_end() const {
794   return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
795 }
796 
getName() const797 StringRef Archive::Symbol::getName() const {
798   return Parent->getSymbolTable().begin() + StringIndex;
799 }
800 
getMember() const801 Expected<Archive::Child> Archive::Symbol::getMember() const {
802   const char *Buf = Parent->getSymbolTable().begin();
803   const char *Offsets = Buf;
804   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
805     Offsets += sizeof(uint64_t);
806   else
807     Offsets += sizeof(uint32_t);
808   uint64_t Offset = 0;
809   if (Parent->kind() == K_GNU) {
810     Offset = read32be(Offsets + SymbolIndex * 4);
811   } else if (Parent->kind() == K_GNU64) {
812     Offset = read64be(Offsets + SymbolIndex * 8);
813   } else if (Parent->kind() == K_BSD) {
814     // The SymbolIndex is an index into the ranlib structs that start at
815     // Offsets (the first uint32_t is the number of bytes of the ranlib
816     // structs).  The ranlib structs are a pair of uint32_t's the first
817     // being a string table offset and the second being the offset into
818     // the archive of the member that defines the symbol.  Which is what
819     // is needed here.
820     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
821   } else if (Parent->kind() == K_DARWIN64) {
822     // The SymbolIndex is an index into the ranlib_64 structs that start at
823     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
824     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
825     // being a string table offset and the second being the offset into
826     // the archive of the member that defines the symbol.  Which is what
827     // is needed here.
828     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
829   } else {
830     // Skip offsets.
831     uint32_t MemberCount = read32le(Buf);
832     Buf += MemberCount * 4 + 4;
833 
834     uint32_t SymbolCount = read32le(Buf);
835     if (SymbolIndex >= SymbolCount)
836       return errorCodeToError(object_error::parse_failed);
837 
838     // Skip SymbolCount to get to the indices table.
839     const char *Indices = Buf + 4;
840 
841     // Get the index of the offset in the file member offset table for this
842     // symbol.
843     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
844     // Subtract 1 since OffsetIndex is 1 based.
845     --OffsetIndex;
846 
847     if (OffsetIndex >= MemberCount)
848       return errorCodeToError(object_error::parse_failed);
849 
850     Offset = read32le(Offsets + OffsetIndex * 4);
851   }
852 
853   const char *Loc = Parent->getData().begin() + Offset;
854   Error Err = Error::success();
855   Child C(Parent, Loc, &Err);
856   if (Err)
857     return std::move(Err);
858   return C;
859 }
860 
getNext() const861 Archive::Symbol Archive::Symbol::getNext() const {
862   Symbol t(*this);
863   if (Parent->kind() == K_BSD) {
864     // t.StringIndex is an offset from the start of the __.SYMDEF or
865     // "__.SYMDEF SORTED" member into the string table for the ranlib
866     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
867     // offset in the string table for t.SymbolIndex+1 we subtract the
868     // its offset from the start of the string table for t.SymbolIndex
869     // and add the offset of the string table for t.SymbolIndex+1.
870 
871     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
872     // which is the number of bytes of ranlib structs that follow.  The ranlib
873     // structs are a pair of uint32_t's the first being a string table offset
874     // and the second being the offset into the archive of the member that
875     // define the symbol. After that the next uint32_t is the byte count of
876     // the string table followed by the string table.
877     const char *Buf = Parent->getSymbolTable().begin();
878     uint32_t RanlibCount = 0;
879     RanlibCount = read32le(Buf) / 8;
880     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
881     // don't change the t.StringIndex as we don't want to reference a ranlib
882     // past RanlibCount.
883     if (t.SymbolIndex + 1 < RanlibCount) {
884       const char *Ranlibs = Buf + 4;
885       uint32_t CurRanStrx = 0;
886       uint32_t NextRanStrx = 0;
887       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
888       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
889       t.StringIndex -= CurRanStrx;
890       t.StringIndex += NextRanStrx;
891     }
892   } else {
893     // Go to one past next null.
894     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
895   }
896   ++t.SymbolIndex;
897   return t;
898 }
899 
symbol_begin() const900 Archive::symbol_iterator Archive::symbol_begin() const {
901   if (!hasSymbolTable())
902     return symbol_iterator(Symbol(this, 0, 0));
903 
904   const char *buf = getSymbolTable().begin();
905   if (kind() == K_GNU) {
906     uint32_t symbol_count = 0;
907     symbol_count = read32be(buf);
908     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
909   } else if (kind() == K_GNU64) {
910     uint64_t symbol_count = read64be(buf);
911     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
912   } else if (kind() == K_BSD) {
913     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
914     // which is the number of bytes of ranlib structs that follow.  The ranlib
915     // structs are a pair of uint32_t's the first being a string table offset
916     // and the second being the offset into the archive of the member that
917     // define the symbol. After that the next uint32_t is the byte count of
918     // the string table followed by the string table.
919     uint32_t ranlib_count = 0;
920     ranlib_count = read32le(buf) / 8;
921     const char *ranlibs = buf + 4;
922     uint32_t ran_strx = 0;
923     ran_strx = read32le(ranlibs);
924     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
925     // Skip the byte count of the string table.
926     buf += sizeof(uint32_t);
927     buf += ran_strx;
928   } else if (kind() == K_DARWIN64) {
929     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
930     // which is the number of bytes of ranlib_64 structs that follow.  The
931     // ranlib_64 structs are a pair of uint64_t's the first being a string
932     // table offset and the second being the offset into the archive of the
933     // member that define the symbol. After that the next uint64_t is the byte
934     // count of the string table followed by the string table.
935     uint64_t ranlib_count = 0;
936     ranlib_count = read64le(buf) / 16;
937     const char *ranlibs = buf + 8;
938     uint64_t ran_strx = 0;
939     ran_strx = read64le(ranlibs);
940     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
941     // Skip the byte count of the string table.
942     buf += sizeof(uint64_t);
943     buf += ran_strx;
944   } else {
945     uint32_t member_count = 0;
946     uint32_t symbol_count = 0;
947     member_count = read32le(buf);
948     buf += 4 + (member_count * 4); // Skip offsets.
949     symbol_count = read32le(buf);
950     buf += 4 + (symbol_count * 2); // Skip indices.
951   }
952   uint32_t string_start_offset = buf - getSymbolTable().begin();
953   return symbol_iterator(Symbol(this, 0, string_start_offset));
954 }
955 
symbol_end() const956 Archive::symbol_iterator Archive::symbol_end() const {
957   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
958 }
959 
getNumberOfSymbols() const960 uint32_t Archive::getNumberOfSymbols() const {
961   if (!hasSymbolTable())
962     return 0;
963   const char *buf = getSymbolTable().begin();
964   if (kind() == K_GNU)
965     return read32be(buf);
966   if (kind() == K_GNU64)
967     return read64be(buf);
968   if (kind() == K_BSD)
969     return read32le(buf) / 8;
970   if (kind() == K_DARWIN64)
971     return read64le(buf) / 16;
972   uint32_t member_count = 0;
973   member_count = read32le(buf);
974   buf += 4 + (member_count * 4); // Skip offsets.
975   return read32le(buf);
976 }
977 
findSym(StringRef name) const978 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
979   Archive::symbol_iterator bs = symbol_begin();
980   Archive::symbol_iterator es = symbol_end();
981 
982   for (; bs != es; ++bs) {
983     StringRef SymName = bs->getName();
984     if (SymName == name) {
985       if (auto MemberOrErr = bs->getMember())
986         return Child(*MemberOrErr);
987       else
988         return MemberOrErr.takeError();
989     }
990   }
991   return Optional<Child>();
992 }
993 
994 // Returns true if archive file contains no member file.
isEmpty() const995 bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
996 
hasSymbolTable() const997 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
998