1 //===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/Object/MachOObject.h"
11 #include "llvm/ADT/StringRef.h"
12 #include "llvm/ADT/SmallVector.h"
13 #include "llvm/Support/MemoryBuffer.h"
14 #include "llvm/Support/Host.h"
15 #include "llvm/Support/SwapByteOrder.h"
16 #include "llvm/Support/raw_ostream.h"
17 #include "llvm/Support/Debug.h"
18 
19 using namespace llvm;
20 using namespace llvm::object;
21 
22 /* Translation Utilities */
23 
24 template<typename T>
SwapValue(T & Value)25 static void SwapValue(T &Value) {
26   Value = sys::SwapByteOrder(Value);
27 }
28 
29 template<typename T>
30 static void SwapStruct(T &Value);
31 
32 template<typename T>
ReadInMemoryStruct(const MachOObject & MOO,StringRef Buffer,uint64_t Base,InMemoryStruct<T> & Res)33 static void ReadInMemoryStruct(const MachOObject &MOO,
34                                StringRef Buffer, uint64_t Base,
35                                InMemoryStruct<T> &Res) {
36   typedef T struct_type;
37   uint64_t Size = sizeof(struct_type);
38 
39   // Check that the buffer contains the expected data.
40   if (Base + Size >  Buffer.size()) {
41     Res = 0;
42     return;
43   }
44 
45   // Check whether we can return a direct pointer.
46   struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
47   if (!MOO.isSwappedEndian()) {
48     Res = Ptr;
49     return;
50   }
51 
52   // Otherwise, copy the struct and translate the values.
53   Res = *Ptr;
54   SwapStruct(*Res);
55 }
56 
57 /* *** */
58 
MachOObject(MemoryBuffer * Buffer_,bool IsLittleEndian_,bool Is64Bit_)59 MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
60                          bool Is64Bit_)
61   : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
62     IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
63     HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
64   // Load the common header.
65   memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
66   if (IsSwappedEndian) {
67     SwapValue(Header.Magic);
68     SwapValue(Header.CPUType);
69     SwapValue(Header.CPUSubtype);
70     SwapValue(Header.FileType);
71     SwapValue(Header.NumLoadCommands);
72     SwapValue(Header.SizeOfLoadCommands);
73     SwapValue(Header.Flags);
74   }
75 
76   if (is64Bit()) {
77     memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
78            sizeof(Header64Ext));
79     if (IsSwappedEndian) {
80       SwapValue(Header64Ext.Reserved);
81     }
82   }
83 
84   // Create the load command array if sane.
85   if (getHeader().NumLoadCommands < (1 << 20))
86     LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
87 }
88 
~MachOObject()89 MachOObject::~MachOObject() {
90   delete [] LoadCommands;
91 }
92 
LoadFromBuffer(MemoryBuffer * Buffer,std::string * ErrorStr)93 MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
94                                          std::string *ErrorStr) {
95   // First, check the magic value and initialize the basic object info.
96   bool IsLittleEndian = false, Is64Bit = false;
97   StringRef Magic = Buffer->getBuffer().slice(0, 4);
98   if (Magic == "\xFE\xED\xFA\xCE") {
99   }  else if (Magic == "\xCE\xFA\xED\xFE") {
100     IsLittleEndian = true;
101   } else if (Magic == "\xFE\xED\xFA\xCF") {
102     Is64Bit = true;
103   } else if (Magic == "\xCF\xFA\xED\xFE") {
104     IsLittleEndian = true;
105     Is64Bit = true;
106   } else {
107     if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
108     return 0;
109   }
110 
111   // Ensure that the at least the full header is present.
112   unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
113   if (Buffer->getBufferSize() < HeaderSize) {
114     if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
115     return 0;
116   }
117 
118   OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
119                                                 Is64Bit));
120 
121   // Check for bogus number of load commands.
122   if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
123     if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
124     return 0;
125   }
126 
127   if (ErrorStr) *ErrorStr = "";
128   return Object.take();
129 }
130 
getData(size_t Offset,size_t Size) const131 StringRef MachOObject::getData(size_t Offset, size_t Size) const {
132   return Buffer->getBuffer().substr(Offset,Size);
133 }
134 
RegisterStringTable(macho::SymtabLoadCommand & SLC)135 void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
136   HasStringTable = true;
137   StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
138                                            SLC.StringTableSize);
139 }
140 
141 const MachOObject::LoadCommandInfo &
getLoadCommandInfo(unsigned Index) const142 MachOObject::getLoadCommandInfo(unsigned Index) const {
143   assert(Index < getHeader().NumLoadCommands && "Invalid index!");
144 
145   // Load the command, if necessary.
146   if (Index >= NumLoadedCommands) {
147     uint64_t Offset;
148     if (Index == 0) {
149       Offset = getHeaderSize();
150     } else {
151       const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
152       Offset = Prev.Offset + Prev.Command.Size;
153     }
154 
155     LoadCommandInfo &Info = LoadCommands[Index];
156     memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
157            sizeof(macho::LoadCommand));
158     if (IsSwappedEndian) {
159       SwapValue(Info.Command.Type);
160       SwapValue(Info.Command.Size);
161     }
162     Info.Offset = Offset;
163     NumLoadedCommands = Index + 1;
164   }
165 
166   return LoadCommands[Index];
167 }
168 
169 template<>
SwapStruct(macho::SegmentLoadCommand & Value)170 void SwapStruct(macho::SegmentLoadCommand &Value) {
171   SwapValue(Value.Type);
172   SwapValue(Value.Size);
173   SwapValue(Value.VMAddress);
174   SwapValue(Value.VMSize);
175   SwapValue(Value.FileOffset);
176   SwapValue(Value.FileSize);
177   SwapValue(Value.MaxVMProtection);
178   SwapValue(Value.InitialVMProtection);
179   SwapValue(Value.NumSections);
180   SwapValue(Value.Flags);
181 }
ReadSegmentLoadCommand(const LoadCommandInfo & LCI,InMemoryStruct<macho::SegmentLoadCommand> & Res) const182 void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
183                          InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
184   ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
185 }
186 
187 template<>
SwapStruct(macho::Segment64LoadCommand & Value)188 void SwapStruct(macho::Segment64LoadCommand &Value) {
189   SwapValue(Value.Type);
190   SwapValue(Value.Size);
191   SwapValue(Value.VMAddress);
192   SwapValue(Value.VMSize);
193   SwapValue(Value.FileOffset);
194   SwapValue(Value.FileSize);
195   SwapValue(Value.MaxVMProtection);
196   SwapValue(Value.InitialVMProtection);
197   SwapValue(Value.NumSections);
198   SwapValue(Value.Flags);
199 }
ReadSegment64LoadCommand(const LoadCommandInfo & LCI,InMemoryStruct<macho::Segment64LoadCommand> & Res) const200 void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
201                        InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
202   ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
203 }
204 
205 template<>
SwapStruct(macho::SymtabLoadCommand & Value)206 void SwapStruct(macho::SymtabLoadCommand &Value) {
207   SwapValue(Value.Type);
208   SwapValue(Value.Size);
209   SwapValue(Value.SymbolTableOffset);
210   SwapValue(Value.NumSymbolTableEntries);
211   SwapValue(Value.StringTableOffset);
212   SwapValue(Value.StringTableSize);
213 }
ReadSymtabLoadCommand(const LoadCommandInfo & LCI,InMemoryStruct<macho::SymtabLoadCommand> & Res) const214 void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
215                           InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
216   ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
217 }
218 
219 template<>
SwapStruct(macho::DysymtabLoadCommand & Value)220 void SwapStruct(macho::DysymtabLoadCommand &Value) {
221   SwapValue(Value.Type);
222   SwapValue(Value.Size);
223   SwapValue(Value.LocalSymbolsIndex);
224   SwapValue(Value.NumLocalSymbols);
225   SwapValue(Value.ExternalSymbolsIndex);
226   SwapValue(Value.NumExternalSymbols);
227   SwapValue(Value.UndefinedSymbolsIndex);
228   SwapValue(Value.NumUndefinedSymbols);
229   SwapValue(Value.TOCOffset);
230   SwapValue(Value.NumTOCEntries);
231   SwapValue(Value.ModuleTableOffset);
232   SwapValue(Value.NumModuleTableEntries);
233   SwapValue(Value.ReferenceSymbolTableOffset);
234   SwapValue(Value.NumReferencedSymbolTableEntries);
235   SwapValue(Value.IndirectSymbolTableOffset);
236   SwapValue(Value.NumIndirectSymbolTableEntries);
237   SwapValue(Value.ExternalRelocationTableOffset);
238   SwapValue(Value.NumExternalRelocationTableEntries);
239   SwapValue(Value.LocalRelocationTableOffset);
240   SwapValue(Value.NumLocalRelocationTableEntries);
241 }
ReadDysymtabLoadCommand(const LoadCommandInfo & LCI,InMemoryStruct<macho::DysymtabLoadCommand> & Res) const242 void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
243                         InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
244   ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
245 }
246 
247 template<>
SwapStruct(macho::LinkeditDataLoadCommand & Value)248 void SwapStruct(macho::LinkeditDataLoadCommand &Value) {
249   SwapValue(Value.Type);
250   SwapValue(Value.Size);
251   SwapValue(Value.DataOffset);
252   SwapValue(Value.DataSize);
253 }
ReadLinkeditDataLoadCommand(const LoadCommandInfo & LCI,InMemoryStruct<macho::LinkeditDataLoadCommand> & Res) const254 void MachOObject::ReadLinkeditDataLoadCommand(const LoadCommandInfo &LCI,
255                     InMemoryStruct<macho::LinkeditDataLoadCommand> &Res) const {
256   ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
257 }
258 
259 template<>
SwapStruct(macho::IndirectSymbolTableEntry & Value)260 void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
261   SwapValue(Value.Index);
262 }
263 void
ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand & DLC,unsigned Index,InMemoryStruct<macho::IndirectSymbolTableEntry> & Res) const264 MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
265                                           unsigned Index,
266                    InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
267   uint64_t Offset = (DLC.IndirectSymbolTableOffset +
268                      Index * sizeof(macho::IndirectSymbolTableEntry));
269   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
270 }
271 
272 
273 template<>
SwapStruct(macho::Section & Value)274 void SwapStruct(macho::Section &Value) {
275   SwapValue(Value.Address);
276   SwapValue(Value.Size);
277   SwapValue(Value.Offset);
278   SwapValue(Value.Align);
279   SwapValue(Value.RelocationTableOffset);
280   SwapValue(Value.NumRelocationTableEntries);
281   SwapValue(Value.Flags);
282   SwapValue(Value.Reserved1);
283   SwapValue(Value.Reserved2);
284 }
ReadSection(const LoadCommandInfo & LCI,unsigned Index,InMemoryStruct<macho::Section> & Res) const285 void MachOObject::ReadSection(const LoadCommandInfo &LCI,
286                               unsigned Index,
287                               InMemoryStruct<macho::Section> &Res) const {
288   assert(LCI.Command.Type == macho::LCT_Segment &&
289          "Unexpected load command info!");
290   uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
291                      Index * sizeof(macho::Section));
292   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
293 }
294 
295 template<>
SwapStruct(macho::Section64 & Value)296 void SwapStruct(macho::Section64 &Value) {
297   SwapValue(Value.Address);
298   SwapValue(Value.Size);
299   SwapValue(Value.Offset);
300   SwapValue(Value.Align);
301   SwapValue(Value.RelocationTableOffset);
302   SwapValue(Value.NumRelocationTableEntries);
303   SwapValue(Value.Flags);
304   SwapValue(Value.Reserved1);
305   SwapValue(Value.Reserved2);
306   SwapValue(Value.Reserved3);
307 }
ReadSection64(const LoadCommandInfo & LCI,unsigned Index,InMemoryStruct<macho::Section64> & Res) const308 void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
309                                 unsigned Index,
310                                 InMemoryStruct<macho::Section64> &Res) const {
311   assert(LCI.Command.Type == macho::LCT_Segment64 &&
312          "Unexpected load command info!");
313   uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
314                      Index * sizeof(macho::Section64));
315   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
316 }
317 
318 template<>
SwapStruct(macho::RelocationEntry & Value)319 void SwapStruct(macho::RelocationEntry &Value) {
320   SwapValue(Value.Word0);
321   SwapValue(Value.Word1);
322 }
ReadRelocationEntry(uint64_t RelocationTableOffset,unsigned Index,InMemoryStruct<macho::RelocationEntry> & Res) const323 void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
324                                       unsigned Index,
325                             InMemoryStruct<macho::RelocationEntry> &Res) const {
326   uint64_t Offset = (RelocationTableOffset +
327                      Index * sizeof(macho::RelocationEntry));
328   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
329 }
330 
331 template<>
SwapStruct(macho::SymbolTableEntry & Value)332 void SwapStruct(macho::SymbolTableEntry &Value) {
333   SwapValue(Value.StringIndex);
334   SwapValue(Value.Flags);
335   SwapValue(Value.Value);
336 }
ReadSymbolTableEntry(uint64_t SymbolTableOffset,unsigned Index,InMemoryStruct<macho::SymbolTableEntry> & Res) const337 void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
338                                        unsigned Index,
339                            InMemoryStruct<macho::SymbolTableEntry> &Res) const {
340   uint64_t Offset = (SymbolTableOffset +
341                      Index * sizeof(macho::SymbolTableEntry));
342   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
343 }
344 
345 template<>
SwapStruct(macho::Symbol64TableEntry & Value)346 void SwapStruct(macho::Symbol64TableEntry &Value) {
347   SwapValue(Value.StringIndex);
348   SwapValue(Value.Flags);
349   SwapValue(Value.Value);
350 }
ReadSymbol64TableEntry(uint64_t SymbolTableOffset,unsigned Index,InMemoryStruct<macho::Symbol64TableEntry> & Res) const351 void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
352                                        unsigned Index,
353                          InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
354   uint64_t Offset = (SymbolTableOffset +
355                      Index * sizeof(macho::Symbol64TableEntry));
356   ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
357 }
358 
359 
ReadULEB128s(uint64_t Index,SmallVectorImpl<uint64_t> & Out) const360 void MachOObject::ReadULEB128s(uint64_t Index,
361                                SmallVectorImpl<uint64_t> &Out) const {
362   const char *ptr = Buffer->getBufferStart() + Index;
363   uint64_t data = 0;
364   uint64_t delta = 0;
365   uint32_t shift = 0;
366   while (true) {
367     assert(ptr < Buffer->getBufferEnd() && "index out of bounds");
368     assert(shift < 64 && "too big for uint64_t");
369 
370     uint8_t byte = *ptr++;
371     delta |= ((byte & 0x7F) << shift);
372     shift += 7;
373     if (byte < 0x80) {
374       if (delta == 0)
375         break;
376       data += delta;
377       Out.push_back(data);
378       delta = 0;
379       shift = 0;
380     }
381   }
382 }
383 
384 /* ** */
385 // Object Dumping Facilities
dump() const386 void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; }
dumpHeader() const387 void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; }
388 
printHeader(raw_ostream & O) const389 void MachOObject::printHeader(raw_ostream &O) const {
390   O << "('cputype', " << Header.CPUType << ")\n";
391   O << "('cpusubtype', " << Header.CPUSubtype << ")\n";
392   O << "('filetype', " << Header.FileType << ")\n";
393   O << "('num_load_commands', " << Header.NumLoadCommands << ")\n";
394   O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n";
395   O << "('flag', " << Header.Flags << ")\n";
396 
397   // Print extended header if 64-bit.
398   if (is64Bit())
399     O << "('reserved', " << Header64Ext.Reserved << ")\n";
400 }
401 
print(raw_ostream & O) const402 void MachOObject::print(raw_ostream &O) const {
403   O << "Header:\n";
404   printHeader(O);
405   O << "Load Commands:\n";
406 
407   O << "Buffer:\n";
408 }
409