1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // macho_id.cc: Functions to gather identifying information from a macho file
31 //
32 // See macho_id.h for documentation
33 //
34 // Author: Dan Waylonis
35 
36 extern "C" {  // necessary for Leopard
37   #include <fcntl.h>
38   #include <mach-o/loader.h>
39   #include <mach-o/swap.h>
40   #include <stdio.h>
41   #include <stdlib.h>
42   #include <string.h>
43   #include <sys/time.h>
44   #include <sys/types.h>
45   #include <unistd.h>
46 }
47 
48 #include "common/mac/macho_id.h"
49 #include "common/mac/macho_walker.h"
50 #include "common/mac/macho_utilities.h"
51 
52 namespace MacFileUtilities {
53 
54 using google_breakpad::MD5Init;
55 using google_breakpad::MD5Update;
56 using google_breakpad::MD5Final;
57 
MachoID(const char * path)58 MachoID::MachoID(const char *path)
59    : memory_(0),
60      memory_size_(0),
61      crc_(0),
62      md5_context_(),
63      update_function_(NULL) {
64   strlcpy(path_, path, sizeof(path_));
65 }
66 
MachoID(const char * path,void * memory,size_t size)67 MachoID::MachoID(const char *path, void *memory, size_t size)
68    : memory_(memory),
69      memory_size_(size),
70      crc_(0),
71      md5_context_(),
72      update_function_(NULL) {
73   strlcpy(path_, path, sizeof(path_));
74 }
75 
~MachoID()76 MachoID::~MachoID() {
77 }
78 
79 // The CRC info is from http://en.wikipedia.org/wiki/Adler-32
80 // With optimizations from http://www.zlib.net/
81 
82 // The largest prime smaller than 65536
83 #define MOD_ADLER 65521
84 // MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1
85 #define MAX_BLOCK 5552
86 
UpdateCRC(unsigned char * bytes,size_t size)87 void MachoID::UpdateCRC(unsigned char *bytes, size_t size) {
88 // Unrolled loops for summing
89 #define DO1(buf,i)  {sum1 += (buf)[i]; sum2 += sum1;}
90 #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
91 #define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
92 #define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
93 #define DO16(buf)   DO8(buf,0); DO8(buf,8);
94   // Split up the crc
95   uint32_t sum1 = crc_ & 0xFFFF;
96   uint32_t sum2 = (crc_ >> 16) & 0xFFFF;
97 
98   // Do large blocks
99   while (size >= MAX_BLOCK) {
100     size -= MAX_BLOCK;
101     int block_count = MAX_BLOCK / 16;
102     do {
103       DO16(bytes);
104       bytes += 16;
105     } while (--block_count);
106     sum1 %= MOD_ADLER;
107     sum2 %= MOD_ADLER;
108   }
109 
110   // Do remaining bytes
111   if (size) {
112     while (size >= 16) {
113       size -= 16;
114       DO16(bytes);
115       bytes += 16;
116     }
117     while (size--) {
118       sum1 += *bytes++;
119       sum2 += sum1;
120     }
121     sum1 %= MOD_ADLER;
122     sum2 %= MOD_ADLER;
123     crc_ = (sum2 << 16) | sum1;
124   }
125 }
126 
UpdateMD5(unsigned char * bytes,size_t size)127 void MachoID::UpdateMD5(unsigned char *bytes, size_t size) {
128   MD5Update(&md5_context_, bytes, static_cast<unsigned>(size));
129 }
130 
Update(MachoWalker * walker,off_t offset,size_t size)131 void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) {
132   if (!update_function_ || !size)
133     return;
134 
135   // Read up to 4k bytes at a time
136   unsigned char buffer[4096];
137   size_t buffer_size;
138   off_t file_offset = offset;
139   while (size > 0) {
140     if (size > sizeof(buffer)) {
141       buffer_size = sizeof(buffer);
142       size -= buffer_size;
143     } else {
144       buffer_size = size;
145       size = 0;
146     }
147 
148     if (!walker->ReadBytes(buffer, buffer_size, file_offset))
149       return;
150 
151     (this->*update_function_)(buffer, buffer_size);
152     file_offset += buffer_size;
153   }
154 }
155 
UUIDCommand(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char bytes[16])156 bool MachoID::UUIDCommand(cpu_type_t cpu_type,
157                           cpu_subtype_t cpu_subtype,
158                           unsigned char bytes[16]) {
159   struct breakpad_uuid_command uuid_cmd;
160   uuid_cmd.cmd = 0;
161   if (!WalkHeader(cpu_type, cpu_subtype, UUIDWalkerCB, &uuid_cmd))
162     return false;
163 
164   // If we found the command, we'll have initialized the uuid_command
165   // structure
166   if (uuid_cmd.cmd == LC_UUID) {
167     memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid));
168     return true;
169   }
170 
171   return false;
172 }
173 
IDCommand(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char identifier[16])174 bool MachoID::IDCommand(cpu_type_t cpu_type,
175                         cpu_subtype_t cpu_subtype,
176                         unsigned char identifier[16]) {
177   struct dylib_command dylib_cmd;
178   dylib_cmd.cmd = 0;
179   if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd))
180     return false;
181 
182   // If we found the command, we'll have initialized the dylib_command
183   // structure
184   if (dylib_cmd.cmd == LC_ID_DYLIB) {
185     // Take the hashed filename, version, and compatability version bytes
186     // to form the first 12 bytes, pad the rest with zeros
187 
188     // create a crude hash of the filename to generate the first 4 bytes
189     identifier[0] = 0;
190     identifier[1] = 0;
191     identifier[2] = 0;
192     identifier[3] = 0;
193 
194     for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) {
195       identifier[j%4] += path_[i];
196     }
197 
198     identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF;
199     identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF;
200     identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF;
201     identifier[7] = dylib_cmd.dylib.current_version & 0xFF;
202     identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF;
203     identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF;
204     identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF;
205     identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF;
206     identifier[12] = (cpu_type >> 24) & 0xFF;
207     identifier[13] = (cpu_type >> 16) & 0xFF;
208     identifier[14] = (cpu_type >> 8) & 0xFF;
209     identifier[15] = cpu_type & 0xFF;
210 
211     return true;
212   }
213 
214   return false;
215 }
216 
Adler32(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype)217 uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
218   update_function_ = &MachoID::UpdateCRC;
219   crc_ = 0;
220 
221   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
222     return 0;
223 
224   return crc_;
225 }
226 
MD5(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char identifier[16])227 bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) {
228   update_function_ = &MachoID::UpdateMD5;
229 
230   MD5Init(&md5_context_);
231 
232   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
233     return false;
234 
235   MD5Final(identifier, &md5_context_);
236   return true;
237 }
238 
WalkHeader(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,MachoWalker::LoadCommandCallback callback,void * context)239 bool MachoID::WalkHeader(cpu_type_t cpu_type,
240                          cpu_subtype_t cpu_subtype,
241                          MachoWalker::LoadCommandCallback callback,
242                          void *context) {
243   if (memory_) {
244     MachoWalker walker(memory_, memory_size_, callback, context);
245     return walker.WalkHeader(cpu_type, cpu_subtype);
246   } else {
247     MachoWalker walker(path_, callback, context);
248     return walker.WalkHeader(cpu_type, cpu_subtype);
249   }
250 }
251 
252 // static
WalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)253 bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
254                        bool swap, void *context) {
255   MachoID *macho_id = (MachoID *)context;
256 
257   if (cmd->cmd == LC_SEGMENT) {
258     struct segment_command seg;
259 
260     if (!walker->ReadBytes(&seg, sizeof(seg), offset))
261       return false;
262 
263     if (swap)
264       swap_segment_command(&seg, NXHostByteOrder());
265 
266     struct mach_header_64 header;
267     off_t header_offset;
268 
269     if (!walker->CurrentHeader(&header, &header_offset))
270       return false;
271 
272     // Process segments that have sections:
273     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
274     offset += sizeof(struct segment_command);
275     struct section sec;
276     for (unsigned long i = 0; i < seg.nsects; ++i) {
277       if (!walker->ReadBytes(&sec, sizeof(sec), offset))
278         return false;
279 
280       if (swap)
281         swap_section(&sec, 1, NXHostByteOrder());
282 
283       // sections of type S_ZEROFILL are "virtual" and contain no data
284       // in the file itself
285       if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0)
286         macho_id->Update(walker, header_offset + sec.offset, sec.size);
287 
288       offset += sizeof(struct section);
289     }
290   } else if (cmd->cmd == LC_SEGMENT_64) {
291     struct segment_command_64 seg64;
292 
293     if (!walker->ReadBytes(&seg64, sizeof(seg64), offset))
294       return false;
295 
296     if (swap)
297       breakpad_swap_segment_command_64(&seg64, NXHostByteOrder());
298 
299     struct mach_header_64 header;
300     off_t header_offset;
301 
302     if (!walker->CurrentHeader(&header, &header_offset))
303       return false;
304 
305     // Process segments that have sections:
306     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
307     offset += sizeof(struct segment_command_64);
308     struct section_64 sec64;
309     for (unsigned long i = 0; i < seg64.nsects; ++i) {
310       if (!walker->ReadBytes(&sec64, sizeof(sec64), offset))
311         return false;
312 
313       if (swap)
314         breakpad_swap_section_64(&sec64, 1, NXHostByteOrder());
315 
316       // sections of type S_ZEROFILL are "virtual" and contain no data
317       // in the file itself
318       if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0)
319         macho_id->Update(walker,
320                          header_offset + sec64.offset,
321                          (size_t)sec64.size);
322 
323       offset += sizeof(struct section_64);
324     }
325   }
326 
327   // Continue processing
328   return true;
329 }
330 
331 // static
UUIDWalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)332 bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
333                            bool swap, void *context) {
334   if (cmd->cmd == LC_UUID) {
335     struct breakpad_uuid_command *uuid_cmd =
336       (struct breakpad_uuid_command *)context;
337 
338     if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command),
339                            offset))
340       return false;
341 
342     if (swap)
343       breakpad_swap_uuid_command(uuid_cmd, NXHostByteOrder());
344 
345     return false;
346   }
347 
348   // Continue processing
349   return true;
350 }
351 
352 // static
IDWalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)353 bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
354                          bool swap, void *context) {
355   if (cmd->cmd == LC_ID_DYLIB) {
356     struct dylib_command *dylib_cmd = (struct dylib_command *)context;
357 
358     if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset))
359       return false;
360 
361     if (swap)
362       swap_dylib_command(dylib_cmd, NXHostByteOrder());
363 
364     return false;
365   }
366 
367   // Continue processing
368   return true;
369 }
370 
371 }  // namespace MacFileUtilities
372