1 //===- MachOObjcopy.cpp -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "MachOObjcopy.h"
10 #include "../CopyConfig.h"
11 #include "../llvm-objcopy.h"
12 #include "MachOReader.h"
13 #include "MachOWriter.h"
14 #include "llvm/ADT/DenseSet.h"
15 #include "llvm/Object/ArchiveWriter.h"
16 #include "llvm/Object/MachOUniversal.h"
17 #include "llvm/Object/MachOUniversalWriter.h"
18 #include "llvm/Support/Errc.h"
19 #include "llvm/Support/Error.h"
20 
21 namespace llvm {
22 namespace objcopy {
23 namespace macho {
24 
25 using namespace object;
26 using SectionPred = std::function<bool(const std::unique_ptr<Section> &Sec)>;
27 using LoadCommandPred = std::function<bool(const LoadCommand &LC)>;
28 
29 #ifndef NDEBUG
isLoadCommandWithPayloadString(const LoadCommand & LC)30 static bool isLoadCommandWithPayloadString(const LoadCommand &LC) {
31   // TODO: Add support for LC_REEXPORT_DYLIB, LC_LOAD_UPWARD_DYLIB and
32   // LC_LAZY_LOAD_DYLIB
33   return LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH ||
34          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_ID_DYLIB ||
35          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_DYLIB ||
36          LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_LOAD_WEAK_DYLIB;
37 }
38 #endif
39 
getPayloadString(const LoadCommand & LC)40 static StringRef getPayloadString(const LoadCommand &LC) {
41   assert(isLoadCommandWithPayloadString(LC) &&
42          "unsupported load command encountered");
43 
44   return StringRef(reinterpret_cast<const char *>(LC.Payload.data()),
45                    LC.Payload.size())
46       .rtrim('\0');
47 }
48 
removeSections(const CopyConfig & Config,Object & Obj)49 static Error removeSections(const CopyConfig &Config, Object &Obj) {
50   SectionPred RemovePred = [](const std::unique_ptr<Section> &) {
51     return false;
52   };
53 
54   if (!Config.ToRemove.empty()) {
55     RemovePred = [&Config, RemovePred](const std::unique_ptr<Section> &Sec) {
56       return Config.ToRemove.matches(Sec->CanonicalName);
57     };
58   }
59 
60   if (Config.StripAll || Config.StripDebug) {
61     // Remove all debug sections.
62     RemovePred = [RemovePred](const std::unique_ptr<Section> &Sec) {
63       if (Sec->Segname == "__DWARF")
64         return true;
65 
66       return RemovePred(Sec);
67     };
68   }
69 
70   if (!Config.OnlySection.empty()) {
71     // Overwrite RemovePred because --only-section takes priority.
72     RemovePred = [&Config](const std::unique_ptr<Section> &Sec) {
73       return !Config.OnlySection.matches(Sec->CanonicalName);
74     };
75   }
76 
77   return Obj.removeSections(RemovePred);
78 }
79 
markSymbols(const CopyConfig & Config,Object & Obj)80 static void markSymbols(const CopyConfig &Config, Object &Obj) {
81   // Symbols referenced from the indirect symbol table must not be removed.
82   for (IndirectSymbolEntry &ISE : Obj.IndirectSymTable.Symbols)
83     if (ISE.Symbol)
84       (*ISE.Symbol)->Referenced = true;
85 }
86 
updateAndRemoveSymbols(const CopyConfig & Config,Object & Obj)87 static void updateAndRemoveSymbols(const CopyConfig &Config, Object &Obj) {
88   for (SymbolEntry &Sym : Obj.SymTable) {
89     auto I = Config.SymbolsToRename.find(Sym.Name);
90     if (I != Config.SymbolsToRename.end())
91       Sym.Name = std::string(I->getValue());
92   }
93 
94   auto RemovePred = [Config, &Obj](const std::unique_ptr<SymbolEntry> &N) {
95     if (N->Referenced)
96       return false;
97     if (Config.StripAll)
98       return true;
99     if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
100       return true;
101     // This behavior is consistent with cctools' strip.
102     if (Config.StripSwiftSymbols && (Obj.Header.Flags & MachO::MH_DYLDLINK) &&
103         Obj.SwiftVersion && *Obj.SwiftVersion && N->isSwiftSymbol())
104       return true;
105     return false;
106   };
107 
108   Obj.SymTable.removeSymbols(RemovePred);
109 }
110 
111 template <typename LCType>
updateLoadCommandPayloadString(LoadCommand & LC,StringRef S)112 static void updateLoadCommandPayloadString(LoadCommand &LC, StringRef S) {
113   assert(isLoadCommandWithPayloadString(LC) &&
114          "unsupported load command encountered");
115 
116   uint32_t NewCmdsize = alignTo(sizeof(LCType) + S.size() + 1, 8);
117 
118   LC.MachOLoadCommand.load_command_data.cmdsize = NewCmdsize;
119   LC.Payload.assign(NewCmdsize - sizeof(LCType), 0);
120   std::copy(S.begin(), S.end(), LC.Payload.begin());
121 }
122 
buildRPathLoadCommand(StringRef Path)123 static LoadCommand buildRPathLoadCommand(StringRef Path) {
124   LoadCommand LC;
125   MachO::rpath_command RPathLC;
126   RPathLC.cmd = MachO::LC_RPATH;
127   RPathLC.path = sizeof(MachO::rpath_command);
128   RPathLC.cmdsize = alignTo(sizeof(MachO::rpath_command) + Path.size() + 1, 8);
129   LC.MachOLoadCommand.rpath_command_data = RPathLC;
130   LC.Payload.assign(RPathLC.cmdsize - sizeof(MachO::rpath_command), 0);
131   std::copy(Path.begin(), Path.end(), LC.Payload.begin());
132   return LC;
133 }
134 
processLoadCommands(const CopyConfig & Config,Object & Obj)135 static Error processLoadCommands(const CopyConfig &Config, Object &Obj) {
136   // Remove RPaths.
137   DenseSet<StringRef> RPathsToRemove(Config.RPathsToRemove.begin(),
138                                      Config.RPathsToRemove.end());
139 
140   LoadCommandPred RemovePred = [&RPathsToRemove,
141                                 &Config](const LoadCommand &LC) {
142     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH) {
143       // When removing all RPaths we don't need to care
144       // about what it contains
145       if (Config.RemoveAllRpaths)
146         return true;
147 
148       StringRef RPath = getPayloadString(LC);
149       if (RPathsToRemove.count(RPath)) {
150         RPathsToRemove.erase(RPath);
151         return true;
152       }
153     }
154     return false;
155   };
156 
157   if (Error E = Obj.removeLoadCommands(RemovePred))
158     return E;
159 
160   // Emit an error if the Mach-O binary does not contain an rpath path name
161   // specified in -delete_rpath.
162   for (StringRef RPath : Config.RPathsToRemove) {
163     if (RPathsToRemove.count(RPath))
164       return createStringError(errc::invalid_argument,
165                                "no LC_RPATH load command with path: %s",
166                                RPath.str().c_str());
167   }
168 
169   DenseSet<StringRef> RPaths;
170 
171   // Get all existing RPaths.
172   for (LoadCommand &LC : Obj.LoadCommands) {
173     if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_RPATH)
174       RPaths.insert(getPayloadString(LC));
175   }
176 
177   // Throw errors for invalid RPaths.
178   for (const auto &OldNew : Config.RPathsToUpdate) {
179     StringRef Old = OldNew.getFirst();
180     StringRef New = OldNew.getSecond();
181     if (RPaths.count(Old) == 0)
182       return createStringError(errc::invalid_argument,
183                                "no LC_RPATH load command with path: " + Old);
184     if (RPaths.count(New) != 0)
185       return createStringError(errc::invalid_argument,
186                                "rpath '" + New +
187                                    "' would create a duplicate load command");
188   }
189 
190   // Update load commands.
191   for (LoadCommand &LC : Obj.LoadCommands) {
192     switch (LC.MachOLoadCommand.load_command_data.cmd) {
193     case MachO::LC_ID_DYLIB:
194       if (Config.SharedLibId)
195         updateLoadCommandPayloadString<MachO::dylib_command>(
196             LC, *Config.SharedLibId);
197       break;
198 
199     case MachO::LC_RPATH: {
200       StringRef RPath = getPayloadString(LC);
201       StringRef NewRPath = Config.RPathsToUpdate.lookup(RPath);
202       if (!NewRPath.empty())
203         updateLoadCommandPayloadString<MachO::rpath_command>(LC, NewRPath);
204       break;
205     }
206 
207     // TODO: Add LC_REEXPORT_DYLIB, LC_LAZY_LOAD_DYLIB, and LC_LOAD_UPWARD_DYLIB
208     // here once llvm-objcopy supports them.
209     case MachO::LC_LOAD_DYLIB:
210     case MachO::LC_LOAD_WEAK_DYLIB:
211       StringRef InstallName = getPayloadString(LC);
212       StringRef NewInstallName =
213           Config.InstallNamesToUpdate.lookup(InstallName);
214       if (!NewInstallName.empty())
215         updateLoadCommandPayloadString<MachO::dylib_command>(LC,
216                                                              NewInstallName);
217       break;
218     }
219   }
220 
221   // Add new RPaths.
222   for (StringRef RPath : Config.RPathToAdd) {
223     if (RPaths.count(RPath) != 0)
224       return createStringError(errc::invalid_argument,
225                                "rpath '" + RPath +
226                                    "' would create a duplicate load command");
227     RPaths.insert(RPath);
228     Obj.LoadCommands.push_back(buildRPathLoadCommand(RPath));
229   }
230 
231   for (StringRef RPath : Config.RPathToPrepend) {
232     if (RPaths.count(RPath) != 0)
233       return createStringError(errc::invalid_argument,
234                                "rpath '" + RPath +
235                                    "' would create a duplicate load command");
236 
237     RPaths.insert(RPath);
238     Obj.LoadCommands.insert(Obj.LoadCommands.begin(),
239                             buildRPathLoadCommand(RPath));
240   }
241 
242   // Unlike appending rpaths, the indexes of subsequent load commands must
243   // be recalculated after prepending one.
244   if (!Config.RPathToPrepend.empty())
245     Obj.updateLoadCommandIndexes();
246 
247   return Error::success();
248 }
249 
dumpSectionToFile(StringRef SecName,StringRef Filename,Object & Obj)250 static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
251                                Object &Obj) {
252   for (LoadCommand &LC : Obj.LoadCommands)
253     for (const std::unique_ptr<Section> &Sec : LC.Sections) {
254       if (Sec->CanonicalName == SecName) {
255         Expected<std::unique_ptr<FileOutputBuffer>> BufferOrErr =
256             FileOutputBuffer::create(Filename, Sec->Content.size());
257         if (!BufferOrErr)
258           return BufferOrErr.takeError();
259         std::unique_ptr<FileOutputBuffer> Buf = std::move(*BufferOrErr);
260         llvm::copy(Sec->Content, Buf->getBufferStart());
261 
262         if (Error E = Buf->commit())
263           return E;
264         return Error::success();
265       }
266     }
267 
268   return createStringError(object_error::parse_failed, "section '%s' not found",
269                            SecName.str().c_str());
270 }
271 
addSection(StringRef SecName,StringRef Filename,Object & Obj)272 static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
273   ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
274       MemoryBuffer::getFile(Filename);
275   if (!BufOrErr)
276     return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
277   std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
278 
279   std::pair<StringRef, StringRef> Pair = SecName.split(',');
280   StringRef TargetSegName = Pair.first;
281   Section Sec(TargetSegName, Pair.second);
282   Sec.Content = Obj.NewSectionsContents.save(Buf->getBuffer());
283   Sec.Size = Sec.Content.size();
284 
285   // Add the a section into an existing segment.
286   for (LoadCommand &LC : Obj.LoadCommands) {
287     Optional<StringRef> SegName = LC.getSegmentName();
288     if (SegName && SegName == TargetSegName) {
289       uint64_t Addr = *LC.getSegmentVMAddr();
290       for (const std::unique_ptr<Section> &S : LC.Sections)
291         Addr = std::max(Addr, S->Addr + S->Size);
292       LC.Sections.push_back(std::make_unique<Section>(Sec));
293       LC.Sections.back()->Addr = Addr;
294       return Error::success();
295     }
296   }
297 
298   // There's no segment named TargetSegName. Create a new load command and
299   // Insert a new section into it.
300   LoadCommand &NewSegment =
301       Obj.addSegment(TargetSegName, alignTo(Sec.Size, 16384));
302   NewSegment.Sections.push_back(std::make_unique<Section>(Sec));
303   NewSegment.Sections.back()->Addr = *NewSegment.getSegmentVMAddr();
304   return Error::success();
305 }
306 
307 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
308 // ("<segment>,<section>") and lengths of both segment and section names are
309 // valid.
isValidMachOCannonicalName(StringRef Name)310 static Error isValidMachOCannonicalName(StringRef Name) {
311   if (Name.count(',') != 1)
312     return createStringError(errc::invalid_argument,
313                              "invalid section name '%s' (should be formatted "
314                              "as '<segment name>,<section name>')",
315                              Name.str().c_str());
316 
317   std::pair<StringRef, StringRef> Pair = Name.split(',');
318   if (Pair.first.size() > 16)
319     return createStringError(errc::invalid_argument,
320                              "too long segment name: '%s'",
321                              Pair.first.str().c_str());
322   if (Pair.second.size() > 16)
323     return createStringError(errc::invalid_argument,
324                              "too long section name: '%s'",
325                              Pair.second.str().c_str());
326   return Error::success();
327 }
328 
handleArgs(const CopyConfig & Config,Object & Obj)329 static Error handleArgs(const CopyConfig &Config, Object &Obj) {
330   if (Config.AllowBrokenLinks || !Config.BuildIdLinkDir.empty() ||
331       Config.BuildIdLinkInput || Config.BuildIdLinkOutput ||
332       !Config.SplitDWO.empty() || !Config.SymbolsPrefix.empty() ||
333       !Config.AllocSectionsPrefix.empty() || !Config.KeepSection.empty() ||
334       Config.NewSymbolVisibility || !Config.SymbolsToGlobalize.empty() ||
335       !Config.SymbolsToKeep.empty() || !Config.SymbolsToLocalize.empty() ||
336       !Config.SymbolsToWeaken.empty() || !Config.SymbolsToKeepGlobal.empty() ||
337       !Config.SectionsToRename.empty() ||
338       !Config.UnneededSymbolsToRemove.empty() ||
339       !Config.SetSectionAlignment.empty() || !Config.SetSectionFlags.empty() ||
340       Config.ExtractDWO || Config.LocalizeHidden || Config.PreserveDates ||
341       Config.StripAllGNU || Config.StripDWO || Config.StripNonAlloc ||
342       Config.StripSections || Config.Weaken || Config.DecompressDebugSections ||
343       Config.StripUnneeded || Config.DiscardMode == DiscardType::Locals ||
344       !Config.SymbolsToAdd.empty() || Config.EntryExpr) {
345     return createStringError(llvm::errc::invalid_argument,
346                              "option not supported by llvm-objcopy for MachO");
347   }
348 
349   // Dump sections before add/remove for compatibility with GNU objcopy.
350   for (StringRef Flag : Config.DumpSection) {
351     StringRef SectionName;
352     StringRef FileName;
353     std::tie(SectionName, FileName) = Flag.split('=');
354     if (Error E = dumpSectionToFile(SectionName, FileName, Obj))
355       return E;
356   }
357 
358   if (Error E = removeSections(Config, Obj))
359     return E;
360 
361   // Mark symbols to determine which symbols are still needed.
362   if (Config.StripAll)
363     markSymbols(Config, Obj);
364 
365   updateAndRemoveSymbols(Config, Obj);
366 
367   if (Config.StripAll)
368     for (LoadCommand &LC : Obj.LoadCommands)
369       for (std::unique_ptr<Section> &Sec : LC.Sections)
370         Sec->Relocations.clear();
371 
372   for (const auto &Flag : Config.AddSection) {
373     std::pair<StringRef, StringRef> SecPair = Flag.split("=");
374     StringRef SecName = SecPair.first;
375     StringRef File = SecPair.second;
376     if (Error E = isValidMachOCannonicalName(SecName))
377       return E;
378     if (Error E = addSection(SecName, File, Obj))
379       return E;
380   }
381 
382   if (Error E = processLoadCommands(Config, Obj))
383     return E;
384 
385   return Error::success();
386 }
387 
executeObjcopyOnBinary(const CopyConfig & Config,object::MachOObjectFile & In,Buffer & Out)388 Error executeObjcopyOnBinary(const CopyConfig &Config,
389                              object::MachOObjectFile &In, Buffer &Out) {
390   MachOReader Reader(In);
391   Expected<std::unique_ptr<Object>> O = Reader.create();
392   if (!O)
393     return createFileError(Config.InputFilename, O.takeError());
394 
395   if (Error E = handleArgs(Config, **O))
396     return createFileError(Config.InputFilename, std::move(E));
397 
398   // Page size used for alignment of segment sizes in Mach-O executables and
399   // dynamic libraries.
400   uint64_t PageSize;
401   switch (In.getArch()) {
402   case Triple::ArchType::arm:
403   case Triple::ArchType::aarch64:
404   case Triple::ArchType::aarch64_32:
405     PageSize = 16384;
406     break;
407   default:
408     PageSize = 4096;
409   }
410 
411   MachOWriter Writer(**O, In.is64Bit(), In.isLittleEndian(), PageSize, Out);
412   if (auto E = Writer.finalize())
413     return E;
414   return Writer.write();
415 }
416 
executeObjcopyOnMachOUniversalBinary(CopyConfig & Config,const MachOUniversalBinary & In,Buffer & Out)417 Error executeObjcopyOnMachOUniversalBinary(CopyConfig &Config,
418                                            const MachOUniversalBinary &In,
419                                            Buffer &Out) {
420   SmallVector<OwningBinary<Binary>, 2> Binaries;
421   SmallVector<Slice, 2> Slices;
422   for (const auto &O : In.objects()) {
423     Expected<std::unique_ptr<Archive>> ArOrErr = O.getAsArchive();
424     if (ArOrErr) {
425       Expected<std::vector<NewArchiveMember>> NewArchiveMembersOrErr =
426           createNewArchiveMembers(Config, **ArOrErr);
427       if (!NewArchiveMembersOrErr)
428         return NewArchiveMembersOrErr.takeError();
429       Expected<std::unique_ptr<MemoryBuffer>> OutputBufferOrErr =
430           writeArchiveToBuffer(*NewArchiveMembersOrErr,
431                                (*ArOrErr)->hasSymbolTable(), (*ArOrErr)->kind(),
432                                Config.DeterministicArchives,
433                                (*ArOrErr)->isThin());
434       if (!OutputBufferOrErr)
435         return OutputBufferOrErr.takeError();
436       Expected<std::unique_ptr<Binary>> BinaryOrErr =
437           object::createBinary(**OutputBufferOrErr);
438       if (!BinaryOrErr)
439         return BinaryOrErr.takeError();
440       Binaries.emplace_back(std::move(*BinaryOrErr),
441                             std::move(*OutputBufferOrErr));
442       Slices.emplace_back(*cast<Archive>(Binaries.back().getBinary()),
443                           O.getCPUType(), O.getCPUSubType(),
444                           O.getArchFlagName(), O.getAlign());
445       continue;
446     }
447     // The methods getAsArchive, getAsObjectFile, getAsIRObject of the class
448     // ObjectForArch return an Error in case of the type mismatch. We need to
449     // check each in turn to see what kind of slice this is, so ignore errors
450     // produced along the way.
451     consumeError(ArOrErr.takeError());
452 
453     Expected<std::unique_ptr<MachOObjectFile>> ObjOrErr = O.getAsObjectFile();
454     if (!ObjOrErr) {
455       consumeError(ObjOrErr.takeError());
456       return createStringError(std::errc::invalid_argument,
457                                "slice for '%s' of the universal Mach-O binary "
458                                "'%s' is not a Mach-O object or an archive",
459                                O.getArchFlagName().c_str(),
460                                Config.InputFilename.str().c_str());
461     }
462     std::string ArchFlagName = O.getArchFlagName();
463     MemBuffer MB(ArchFlagName);
464     if (Error E = executeObjcopyOnBinary(Config, **ObjOrErr, MB))
465       return E;
466     std::unique_ptr<WritableMemoryBuffer> OutputBuffer =
467         MB.releaseMemoryBuffer();
468     Expected<std::unique_ptr<Binary>> BinaryOrErr =
469         object::createBinary(*OutputBuffer);
470     if (!BinaryOrErr)
471       return BinaryOrErr.takeError();
472     Binaries.emplace_back(std::move(*BinaryOrErr), std::move(OutputBuffer));
473     Slices.emplace_back(*cast<MachOObjectFile>(Binaries.back().getBinary()),
474                         O.getAlign());
475   }
476   Expected<std::unique_ptr<MemoryBuffer>> B =
477       writeUniversalBinaryToBuffer(Slices);
478   if (!B)
479     return B.takeError();
480   if (Error E = Out.allocate((*B)->getBufferSize()))
481     return E;
482   memcpy(Out.getBufferStart(), (*B)->getBufferStart(), (*B)->getBufferSize());
483   return Out.commit();
484 }
485 
486 } // end namespace macho
487 } // end namespace objcopy
488 } // end namespace llvm
489