1 //===- ArchiveWriter.cpp - ar File Format implementation --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the writeArchive function.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Object/ArchiveWriter.h"
14 #include "llvm/ADT/ArrayRef.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/BinaryFormat/Magic.h"
18 #include "llvm/IR/LLVMContext.h"
19 #include "llvm/Object/Archive.h"
20 #include "llvm/Object/Error.h"
21 #include "llvm/Object/ObjectFile.h"
22 #include "llvm/Object/SymbolicFile.h"
23 #include "llvm/Support/Alignment.h"
24 #include "llvm/Support/EndianStream.h"
25 #include "llvm/Support/Errc.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/SmallVectorMemoryBuffer.h"
30 #include "llvm/Support/ToolOutputFile.h"
31 #include "llvm/Support/raw_ostream.h"
32
33 #include <map>
34
35 #if !defined(_MSC_VER) && !defined(__MINGW32__)
36 #include <unistd.h>
37 #else
38 #include <io.h>
39 #endif
40
41 using namespace llvm;
42
NewArchiveMember(MemoryBufferRef BufRef)43 NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
44 : Buf(MemoryBuffer::getMemBuffer(BufRef, false)),
45 MemberName(BufRef.getBufferIdentifier()) {}
46
47 Expected<NewArchiveMember>
getOldMember(const object::Archive::Child & OldMember,bool Deterministic)48 NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
49 bool Deterministic) {
50 Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef();
51 if (!BufOrErr)
52 return BufOrErr.takeError();
53
54 NewArchiveMember M;
55 M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false);
56 M.MemberName = M.Buf->getBufferIdentifier();
57 if (!Deterministic) {
58 auto ModTimeOrErr = OldMember.getLastModified();
59 if (!ModTimeOrErr)
60 return ModTimeOrErr.takeError();
61 M.ModTime = ModTimeOrErr.get();
62 Expected<unsigned> UIDOrErr = OldMember.getUID();
63 if (!UIDOrErr)
64 return UIDOrErr.takeError();
65 M.UID = UIDOrErr.get();
66 Expected<unsigned> GIDOrErr = OldMember.getGID();
67 if (!GIDOrErr)
68 return GIDOrErr.takeError();
69 M.GID = GIDOrErr.get();
70 Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode();
71 if (!AccessModeOrErr)
72 return AccessModeOrErr.takeError();
73 M.Perms = AccessModeOrErr.get();
74 }
75 return std::move(M);
76 }
77
getFile(StringRef FileName,bool Deterministic)78 Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
79 bool Deterministic) {
80 sys::fs::file_status Status;
81 auto FDOrErr = sys::fs::openNativeFileForRead(FileName);
82 if (!FDOrErr)
83 return FDOrErr.takeError();
84 sys::fs::file_t FD = *FDOrErr;
85 assert(FD != sys::fs::kInvalidFile);
86
87 if (auto EC = sys::fs::status(FD, Status))
88 return errorCodeToError(EC);
89
90 // Opening a directory doesn't make sense. Let it fail.
91 // Linux cannot open directories with open(2), although
92 // cygwin and *bsd can.
93 if (Status.type() == sys::fs::file_type::directory_file)
94 return errorCodeToError(make_error_code(errc::is_a_directory));
95
96 ErrorOr<std::unique_ptr<MemoryBuffer>> MemberBufferOrErr =
97 MemoryBuffer::getOpenFile(FD, FileName, Status.getSize(), false);
98 if (!MemberBufferOrErr)
99 return errorCodeToError(MemberBufferOrErr.getError());
100
101 if (auto EC = sys::fs::closeFile(FD))
102 return errorCodeToError(EC);
103
104 NewArchiveMember M;
105 M.Buf = std::move(*MemberBufferOrErr);
106 M.MemberName = M.Buf->getBufferIdentifier();
107 if (!Deterministic) {
108 M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>(
109 Status.getLastModificationTime());
110 M.UID = Status.getUser();
111 M.GID = Status.getGroup();
112 M.Perms = Status.permissions();
113 }
114 return std::move(M);
115 }
116
117 template <typename T>
printWithSpacePadding(raw_ostream & OS,T Data,unsigned Size)118 static void printWithSpacePadding(raw_ostream &OS, T Data, unsigned Size) {
119 uint64_t OldPos = OS.tell();
120 OS << Data;
121 unsigned SizeSoFar = OS.tell() - OldPos;
122 assert(SizeSoFar <= Size && "Data doesn't fit in Size");
123 OS.indent(Size - SizeSoFar);
124 }
125
isDarwin(object::Archive::Kind Kind)126 static bool isDarwin(object::Archive::Kind Kind) {
127 return Kind == object::Archive::K_DARWIN ||
128 Kind == object::Archive::K_DARWIN64;
129 }
130
isBSDLike(object::Archive::Kind Kind)131 static bool isBSDLike(object::Archive::Kind Kind) {
132 switch (Kind) {
133 case object::Archive::K_GNU:
134 case object::Archive::K_GNU64:
135 return false;
136 case object::Archive::K_BSD:
137 case object::Archive::K_DARWIN:
138 case object::Archive::K_DARWIN64:
139 return true;
140 case object::Archive::K_COFF:
141 break;
142 }
143 llvm_unreachable("not supported for writting");
144 }
145
146 template <class T>
print(raw_ostream & Out,object::Archive::Kind Kind,T Val)147 static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) {
148 support::endian::write(Out, Val,
149 isBSDLike(Kind) ? support::little : support::big);
150 }
151
printRestOfMemberHeader(raw_ostream & Out,const sys::TimePoint<std::chrono::seconds> & ModTime,unsigned UID,unsigned GID,unsigned Perms,uint64_t Size)152 static void printRestOfMemberHeader(
153 raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime,
154 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) {
155 printWithSpacePadding(Out, sys::toTimeT(ModTime), 12);
156
157 // The format has only 6 chars for uid and gid. Truncate if the provided
158 // values don't fit.
159 printWithSpacePadding(Out, UID % 1000000, 6);
160 printWithSpacePadding(Out, GID % 1000000, 6);
161
162 printWithSpacePadding(Out, format("%o", Perms), 8);
163 printWithSpacePadding(Out, Size, 10);
164 Out << "`\n";
165 }
166
167 static void
printGNUSmallMemberHeader(raw_ostream & Out,StringRef Name,const sys::TimePoint<std::chrono::seconds> & ModTime,unsigned UID,unsigned GID,unsigned Perms,uint64_t Size)168 printGNUSmallMemberHeader(raw_ostream &Out, StringRef Name,
169 const sys::TimePoint<std::chrono::seconds> &ModTime,
170 unsigned UID, unsigned GID, unsigned Perms,
171 uint64_t Size) {
172 printWithSpacePadding(Out, Twine(Name) + "/", 16);
173 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
174 }
175
176 static void
printBSDMemberHeader(raw_ostream & Out,uint64_t Pos,StringRef Name,const sys::TimePoint<std::chrono::seconds> & ModTime,unsigned UID,unsigned GID,unsigned Perms,uint64_t Size)177 printBSDMemberHeader(raw_ostream &Out, uint64_t Pos, StringRef Name,
178 const sys::TimePoint<std::chrono::seconds> &ModTime,
179 unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) {
180 uint64_t PosAfterHeader = Pos + 60 + Name.size();
181 // Pad so that even 64 bit object files are aligned.
182 unsigned Pad = offsetToAlignment(PosAfterHeader, Align(8));
183 unsigned NameWithPadding = Name.size() + Pad;
184 printWithSpacePadding(Out, Twine("#1/") + Twine(NameWithPadding), 16);
185 printRestOfMemberHeader(Out, ModTime, UID, GID, Perms,
186 NameWithPadding + Size);
187 Out << Name;
188 while (Pad--)
189 Out.write(uint8_t(0));
190 }
191
useStringTable(bool Thin,StringRef Name)192 static bool useStringTable(bool Thin, StringRef Name) {
193 return Thin || Name.size() >= 16 || Name.contains('/');
194 }
195
is64BitKind(object::Archive::Kind Kind)196 static bool is64BitKind(object::Archive::Kind Kind) {
197 switch (Kind) {
198 case object::Archive::K_GNU:
199 case object::Archive::K_BSD:
200 case object::Archive::K_DARWIN:
201 case object::Archive::K_COFF:
202 return false;
203 case object::Archive::K_DARWIN64:
204 case object::Archive::K_GNU64:
205 return true;
206 }
207 llvm_unreachable("not supported for writting");
208 }
209
210 static void
printMemberHeader(raw_ostream & Out,uint64_t Pos,raw_ostream & StringTable,StringMap<uint64_t> & MemberNames,object::Archive::Kind Kind,bool Thin,const NewArchiveMember & M,sys::TimePoint<std::chrono::seconds> ModTime,uint64_t Size)211 printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
212 StringMap<uint64_t> &MemberNames, object::Archive::Kind Kind,
213 bool Thin, const NewArchiveMember &M,
214 sys::TimePoint<std::chrono::seconds> ModTime, uint64_t Size) {
215 if (isBSDLike(Kind))
216 return printBSDMemberHeader(Out, Pos, M.MemberName, ModTime, M.UID, M.GID,
217 M.Perms, Size);
218 if (!useStringTable(Thin, M.MemberName))
219 return printGNUSmallMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
220 M.Perms, Size);
221 Out << '/';
222 uint64_t NamePos;
223 if (Thin) {
224 NamePos = StringTable.tell();
225 StringTable << M.MemberName << "/\n";
226 } else {
227 auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
228 if (Insertion.second) {
229 Insertion.first->second = StringTable.tell();
230 StringTable << M.MemberName << "/\n";
231 }
232 NamePos = Insertion.first->second;
233 }
234 printWithSpacePadding(Out, NamePos, 15);
235 printRestOfMemberHeader(Out, ModTime, M.UID, M.GID, M.Perms, Size);
236 }
237
238 namespace {
239 struct MemberData {
240 std::vector<unsigned> Symbols;
241 std::string Header;
242 StringRef Data;
243 StringRef Padding;
244 };
245 } // namespace
246
computeStringTable(StringRef Names)247 static MemberData computeStringTable(StringRef Names) {
248 unsigned Size = Names.size();
249 unsigned Pad = offsetToAlignment(Size, Align(2));
250 std::string Header;
251 raw_string_ostream Out(Header);
252 printWithSpacePadding(Out, "//", 48);
253 printWithSpacePadding(Out, Size + Pad, 10);
254 Out << "`\n";
255 Out.flush();
256 return {{}, std::move(Header), Names, Pad ? "\n" : ""};
257 }
258
now(bool Deterministic)259 static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) {
260 using namespace std::chrono;
261
262 if (!Deterministic)
263 return time_point_cast<seconds>(system_clock::now());
264 return sys::TimePoint<seconds>();
265 }
266
isArchiveSymbol(const object::BasicSymbolRef & S)267 static bool isArchiveSymbol(const object::BasicSymbolRef &S) {
268 Expected<uint32_t> SymFlagsOrErr = S.getFlags();
269 if (!SymFlagsOrErr)
270 // TODO: Actually report errors helpfully.
271 report_fatal_error(SymFlagsOrErr.takeError());
272 if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific)
273 return false;
274 if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global))
275 return false;
276 if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined)
277 return false;
278 return true;
279 }
280
printNBits(raw_ostream & Out,object::Archive::Kind Kind,uint64_t Val)281 static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
282 uint64_t Val) {
283 if (is64BitKind(Kind))
284 print<uint64_t>(Out, Kind, Val);
285 else
286 print<uint32_t>(Out, Kind, Val);
287 }
288
computeSymbolTableSize(object::Archive::Kind Kind,uint64_t NumSyms,uint64_t OffsetSize,StringRef StringTable,uint32_t * Padding=nullptr)289 static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
290 uint64_t NumSyms, uint64_t OffsetSize,
291 StringRef StringTable,
292 uint32_t *Padding = nullptr) {
293 assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize");
294 uint64_t Size = OffsetSize; // Number of entries
295 if (isBSDLike(Kind))
296 Size += NumSyms * OffsetSize * 2; // Table
297 else
298 Size += NumSyms * OffsetSize; // Table
299 if (isBSDLike(Kind))
300 Size += OffsetSize; // byte count
301 Size += StringTable.size();
302 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
303 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
304 // uniformly.
305 // We do this for all bsd formats because it simplifies aligning members.
306 uint32_t Pad = offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
307 Size += Pad;
308 if (Padding)
309 *Padding = Pad;
310 return Size;
311 }
312
writeSymbolTableHeader(raw_ostream & Out,object::Archive::Kind Kind,bool Deterministic,uint64_t Size)313 static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
314 bool Deterministic, uint64_t Size) {
315 if (isBSDLike(Kind)) {
316 const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
317 printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
318 Size);
319 } else {
320 const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
321 printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
322 }
323 }
324
writeSymbolTable(raw_ostream & Out,object::Archive::Kind Kind,bool Deterministic,ArrayRef<MemberData> Members,StringRef StringTable)325 static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
326 bool Deterministic, ArrayRef<MemberData> Members,
327 StringRef StringTable) {
328 // We don't write a symbol table on an archive with no members -- except on
329 // Darwin, where the linker will abort unless the archive has a symbol table.
330 if (StringTable.empty() && !isDarwin(Kind))
331 return;
332
333 unsigned NumSyms = 0;
334 for (const MemberData &M : Members)
335 NumSyms += M.Symbols.size();
336
337 uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
338 uint32_t Pad;
339 uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad);
340 writeSymbolTableHeader(Out, Kind, Deterministic, Size);
341
342 uint64_t Pos = Out.tell() + Size;
343
344 if (isBSDLike(Kind))
345 printNBits(Out, Kind, NumSyms * 2 * OffsetSize);
346 else
347 printNBits(Out, Kind, NumSyms);
348
349 for (const MemberData &M : Members) {
350 for (unsigned StringOffset : M.Symbols) {
351 if (isBSDLike(Kind))
352 printNBits(Out, Kind, StringOffset);
353 printNBits(Out, Kind, Pos); // member offset
354 }
355 Pos += M.Header.size() + M.Data.size() + M.Padding.size();
356 }
357
358 if (isBSDLike(Kind))
359 // byte count of the string table
360 printNBits(Out, Kind, StringTable.size());
361 Out << StringTable;
362
363 while (Pad--)
364 Out.write(uint8_t(0));
365 }
366
367 static Expected<std::vector<unsigned>>
getSymbols(MemoryBufferRef Buf,raw_ostream & SymNames,bool & HasObject)368 getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
369 std::vector<unsigned> Ret;
370
371 // In the scenario when LLVMContext is populated SymbolicFile will contain a
372 // reference to it, thus SymbolicFile should be destroyed first.
373 LLVMContext Context;
374 std::unique_ptr<object::SymbolicFile> Obj;
375
376 const file_magic Type = identify_magic(Buf.getBuffer());
377 // Treat unsupported file types as having no symbols.
378 if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
379 return Ret;
380 if (Type == file_magic::bitcode) {
381 auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
382 Buf, file_magic::bitcode, &Context);
383 if (!ObjOrErr)
384 return ObjOrErr.takeError();
385 Obj = std::move(*ObjOrErr);
386 } else {
387 auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
388 if (!ObjOrErr)
389 return ObjOrErr.takeError();
390 Obj = std::move(*ObjOrErr);
391 }
392
393 HasObject = true;
394 for (const object::BasicSymbolRef &S : Obj->symbols()) {
395 if (!isArchiveSymbol(S))
396 continue;
397 Ret.push_back(SymNames.tell());
398 if (Error E = S.printName(SymNames))
399 return std::move(E);
400 SymNames << '\0';
401 }
402 return Ret;
403 }
404
405 static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream & StringTable,raw_ostream & SymNames,object::Archive::Kind Kind,bool Thin,bool Deterministic,bool NeedSymbols,ArrayRef<NewArchiveMember> NewMembers)406 computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
407 object::Archive::Kind Kind, bool Thin, bool Deterministic,
408 bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) {
409 static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
410
411 // This ignores the symbol table, but we only need the value mod 8 and the
412 // symbol table is aligned to be a multiple of 8 bytes
413 uint64_t Pos = 0;
414
415 std::vector<MemberData> Ret;
416 bool HasObject = false;
417
418 // Deduplicate long member names in the string table and reuse earlier name
419 // offsets. This especially saves space for COFF Import libraries where all
420 // members have the same name.
421 StringMap<uint64_t> MemberNames;
422
423 // UniqueTimestamps is a special case to improve debugging on Darwin:
424 //
425 // The Darwin linker does not link debug info into the final
426 // binary. Instead, it emits entries of type N_OSO in in the output
427 // binary's symbol table, containing references to the linked-in
428 // object files. Using that reference, the debugger can read the
429 // debug data directly from the object files. Alternatively, an
430 // invocation of 'dsymutil' will link the debug data from the object
431 // files into a dSYM bundle, which can be loaded by the debugger,
432 // instead of the object files.
433 //
434 // For an object file, the N_OSO entries contain the absolute path
435 // path to the file, and the file's timestamp. For an object
436 // included in an archive, the path is formatted like
437 // "/absolute/path/to/archive.a(member.o)", and the timestamp is the
438 // archive member's timestamp, rather than the archive's timestamp.
439 //
440 // However, this doesn't always uniquely identify an object within
441 // an archive -- an archive file can have multiple entries with the
442 // same filename. (This will happen commonly if the original object
443 // files started in different directories.) The only way they get
444 // distinguished, then, is via the timestamp. But this process is
445 // unable to find the correct object file in the archive when there
446 // are two files of the same name and timestamp.
447 //
448 // Additionally, timestamp==0 is treated specially, and causes the
449 // timestamp to be ignored as a match criteria.
450 //
451 // That will "usually" work out okay when creating an archive not in
452 // deterministic timestamp mode, because the objects will probably
453 // have been created at different timestamps.
454 //
455 // To ameliorate this problem, in deterministic archive mode (which
456 // is the default), on Darwin we will emit a unique non-zero
457 // timestamp for each entry with a duplicated name. This is still
458 // deterministic: the only thing affecting that timestamp is the
459 // order of the files in the resultant archive.
460 //
461 // See also the functions that handle the lookup:
462 // in lldb: ObjectContainerBSDArchive::Archive::FindObject()
463 // in llvm/tools/dsymutil: BinaryHolder::GetArchiveMemberBuffers().
464 bool UniqueTimestamps = Deterministic && isDarwin(Kind);
465 std::map<StringRef, unsigned> FilenameCount;
466 if (UniqueTimestamps) {
467 for (const NewArchiveMember &M : NewMembers)
468 FilenameCount[M.MemberName]++;
469 for (auto &Entry : FilenameCount)
470 Entry.second = Entry.second > 1 ? 1 : 0;
471 }
472
473 for (const NewArchiveMember &M : NewMembers) {
474 std::string Header;
475 raw_string_ostream Out(Header);
476
477 MemoryBufferRef Buf = M.Buf->getMemBufferRef();
478 StringRef Data = Thin ? "" : Buf.getBuffer();
479
480 // ld64 expects the members to be 8-byte aligned for 64-bit content and at
481 // least 4-byte aligned for 32-bit content. Opt for the larger encoding
482 // uniformly. This matches the behaviour with cctools and ensures that ld64
483 // is happy with archives that we generate.
484 unsigned MemberPadding =
485 isDarwin(Kind) ? offsetToAlignment(Data.size(), Align(8)) : 0;
486 unsigned TailPadding =
487 offsetToAlignment(Data.size() + MemberPadding, Align(2));
488 StringRef Padding = StringRef(PaddingData, MemberPadding + TailPadding);
489
490 sys::TimePoint<std::chrono::seconds> ModTime;
491 if (UniqueTimestamps)
492 // Increment timestamp for each file of a given name.
493 ModTime = sys::toTimePoint(FilenameCount[M.MemberName]++);
494 else
495 ModTime = M.ModTime;
496
497 uint64_t Size = Buf.getBufferSize() + MemberPadding;
498 if (Size > object::Archive::MaxMemberSize) {
499 std::string StringMsg =
500 "File " + M.MemberName.str() + " exceeds size limit";
501 return make_error<object::GenericBinaryError>(
502 std::move(StringMsg), object::object_error::parse_failed);
503 }
504
505 printMemberHeader(Out, Pos, StringTable, MemberNames, Kind, Thin, M,
506 ModTime, Size);
507 Out.flush();
508
509 std::vector<unsigned> Symbols;
510 if (NeedSymbols) {
511 Expected<std::vector<unsigned>> SymbolsOrErr =
512 getSymbols(Buf, SymNames, HasObject);
513 if (auto E = SymbolsOrErr.takeError())
514 return std::move(E);
515 Symbols = std::move(*SymbolsOrErr);
516 }
517
518 Pos += Header.size() + Data.size() + Padding.size();
519 Ret.push_back({std::move(Symbols), std::move(Header), Data, Padding});
520 }
521 // If there are no symbols, emit an empty symbol table, to satisfy Solaris
522 // tools, older versions of which expect a symbol table in a non-empty
523 // archive, regardless of whether there are any symbols in it.
524 if (HasObject && SymNames.tell() == 0)
525 SymNames << '\0' << '\0' << '\0';
526 return Ret;
527 }
528
529 namespace llvm {
530
canonicalizePath(StringRef P)531 static ErrorOr<SmallString<128>> canonicalizePath(StringRef P) {
532 SmallString<128> Ret = P;
533 std::error_code Err = sys::fs::make_absolute(Ret);
534 if (Err)
535 return Err;
536 sys::path::remove_dots(Ret, /*removedotdot*/ true);
537 return Ret;
538 }
539
540 // Compute the relative path from From to To.
computeArchiveRelativePath(StringRef From,StringRef To)541 Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
542 ErrorOr<SmallString<128>> PathToOrErr = canonicalizePath(To);
543 ErrorOr<SmallString<128>> DirFromOrErr = canonicalizePath(From);
544 if (!PathToOrErr || !DirFromOrErr)
545 return errorCodeToError(std::error_code(errno, std::generic_category()));
546
547 const SmallString<128> &PathTo = *PathToOrErr;
548 const SmallString<128> &DirFrom = sys::path::parent_path(*DirFromOrErr);
549
550 // Can't construct a relative path between different roots
551 if (sys::path::root_name(PathTo) != sys::path::root_name(DirFrom))
552 return sys::path::convert_to_slash(PathTo);
553
554 // Skip common prefixes
555 auto FromTo =
556 std::mismatch(sys::path::begin(DirFrom), sys::path::end(DirFrom),
557 sys::path::begin(PathTo));
558 auto FromI = FromTo.first;
559 auto ToI = FromTo.second;
560
561 // Construct relative path
562 SmallString<128> Relative;
563 for (auto FromE = sys::path::end(DirFrom); FromI != FromE; ++FromI)
564 sys::path::append(Relative, sys::path::Style::posix, "..");
565
566 for (auto ToE = sys::path::end(PathTo); ToI != ToE; ++ToI)
567 sys::path::append(Relative, sys::path::Style::posix, *ToI);
568
569 return std::string(Relative.str());
570 }
571
writeArchiveToStream(raw_ostream & Out,ArrayRef<NewArchiveMember> NewMembers,bool WriteSymtab,object::Archive::Kind Kind,bool Deterministic,bool Thin)572 static Error writeArchiveToStream(raw_ostream &Out,
573 ArrayRef<NewArchiveMember> NewMembers,
574 bool WriteSymtab, object::Archive::Kind Kind,
575 bool Deterministic, bool Thin) {
576 assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
577
578 SmallString<0> SymNamesBuf;
579 raw_svector_ostream SymNames(SymNamesBuf);
580 SmallString<0> StringTableBuf;
581 raw_svector_ostream StringTable(StringTableBuf);
582
583 Expected<std::vector<MemberData>> DataOrErr =
584 computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic,
585 WriteSymtab, NewMembers);
586 if (Error E = DataOrErr.takeError())
587 return E;
588 std::vector<MemberData> &Data = *DataOrErr;
589
590 if (!StringTableBuf.empty())
591 Data.insert(Data.begin(), computeStringTable(StringTableBuf));
592
593 // We would like to detect if we need to switch to a 64-bit symbol table.
594 if (WriteSymtab) {
595 uint64_t MaxOffset = 8; // For the file signature.
596 uint64_t LastOffset = MaxOffset;
597 uint64_t NumSyms = 0;
598 for (const auto &M : Data) {
599 // Record the start of the member's offset
600 LastOffset = MaxOffset;
601 // Account for the size of each part associated with the member.
602 MaxOffset += M.Header.size() + M.Data.size() + M.Padding.size();
603 NumSyms += M.Symbols.size();
604 }
605
606 // We assume 32-bit offsets to see if 32-bit symbols are possible or not.
607 uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf);
608 auto computeSymbolTableHeaderSize =
609 [=] {
610 SmallString<0> TmpBuf;
611 raw_svector_ostream Tmp(TmpBuf);
612 writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize);
613 return TmpBuf.size();
614 };
615 LastOffset += computeSymbolTableHeaderSize() + SymtabSize;
616
617 // The SYM64 format is used when an archive's member offsets are larger than
618 // 32-bits can hold. The need for this shift in format is detected by
619 // writeArchive. To test this we need to generate a file with a member that
620 // has an offset larger than 32-bits but this demands a very slow test. To
621 // speed the test up we use this environment variable to pretend like the
622 // cutoff happens before 32-bits and instead happens at some much smaller
623 // value.
624 uint64_t Sym64Threshold = 1ULL << 32;
625 const char *Sym64Env = std::getenv("SYM64_THRESHOLD");
626 if (Sym64Env)
627 StringRef(Sym64Env).getAsInteger(10, Sym64Threshold);
628
629 // If LastOffset isn't going to fit in a 32-bit varible we need to switch
630 // to 64-bit. Note that the file can be larger than 4GB as long as the last
631 // member starts before the 4GB offset.
632 if (LastOffset >= Sym64Threshold) {
633 if (Kind == object::Archive::K_DARWIN)
634 Kind = object::Archive::K_DARWIN64;
635 else
636 Kind = object::Archive::K_GNU64;
637 }
638 }
639
640 if (Thin)
641 Out << "!<thin>\n";
642 else
643 Out << "!<arch>\n";
644
645 if (WriteSymtab)
646 writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf);
647
648 for (const MemberData &M : Data)
649 Out << M.Header << M.Data << M.Padding;
650
651 Out.flush();
652 return Error::success();
653 }
654
writeArchive(StringRef ArcName,ArrayRef<NewArchiveMember> NewMembers,bool WriteSymtab,object::Archive::Kind Kind,bool Deterministic,bool Thin,std::unique_ptr<MemoryBuffer> OldArchiveBuf)655 Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
656 bool WriteSymtab, object::Archive::Kind Kind,
657 bool Deterministic, bool Thin,
658 std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
659 Expected<sys::fs::TempFile> Temp =
660 sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
661 if (!Temp)
662 return Temp.takeError();
663 raw_fd_ostream Out(Temp->FD, false);
664
665 if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
666 Deterministic, Thin)) {
667 if (Error DiscardError = Temp->discard())
668 return joinErrors(std::move(E), std::move(DiscardError));
669 return E;
670 }
671
672 // At this point, we no longer need whatever backing memory
673 // was used to generate the NewMembers. On Windows, this buffer
674 // could be a mapped view of the file we want to replace (if
675 // we're updating an existing archive, say). In that case, the
676 // rename would still succeed, but it would leave behind a
677 // temporary file (actually the original file renamed) because
678 // a file cannot be deleted while there's a handle open on it,
679 // only renamed. So by freeing this buffer, this ensures that
680 // the last open handle on the destination file, if any, is
681 // closed before we attempt to rename.
682 OldArchiveBuf.reset();
683
684 return Temp->keep(ArcName);
685 }
686
687 Expected<std::unique_ptr<MemoryBuffer>>
writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers,bool WriteSymtab,object::Archive::Kind Kind,bool Deterministic,bool Thin)688 writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
689 object::Archive::Kind Kind, bool Deterministic,
690 bool Thin) {
691 SmallVector<char, 0> ArchiveBufferVector;
692 raw_svector_ostream ArchiveStream(ArchiveBufferVector);
693
694 if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
695 Kind, Deterministic, Thin))
696 return std::move(E);
697
698 return std::make_unique<SmallVectorMemoryBuffer>(
699 std::move(ArchiveBufferVector));
700 }
701
702 } // namespace llvm
703