1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // macho_id.cc: Functions to gather identifying information from a macho file
31 //
32 // See macho_id.h for documentation
33 //
34 // Author: Dan Waylonis
35 
36 
37 #include <fcntl.h>
38 #include <mach-o/loader.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <sys/time.h>
43 #include <sys/types.h>
44 #include <unistd.h>
45 
46 #include "common/mac/macho_id.h"
47 #include "common/mac/macho_walker.h"
48 #include "common/mac/macho_utilities.h"
49 
50 namespace MacFileUtilities {
51 
52 using google_breakpad::MD5Init;
53 using google_breakpad::MD5Update;
54 using google_breakpad::MD5Final;
55 
MachoID(const char * path)56 MachoID::MachoID(const char *path)
57    : memory_(0),
58      memory_size_(0),
59      crc_(0),
60      md5_context_(),
61      update_function_(NULL) {
62   snprintf(path_, sizeof(path_), "%s", path);
63 }
64 
MachoID(const char * path,void * memory,size_t size)65 MachoID::MachoID(const char *path, void *memory, size_t size)
66    : memory_(memory),
67      memory_size_(size),
68      crc_(0),
69      md5_context_(),
70      update_function_(NULL) {
71   snprintf(path_, sizeof(path_), "%s", path);
72 }
73 
~MachoID()74 MachoID::~MachoID() {
75 }
76 
77 // The CRC info is from http://en.wikipedia.org/wiki/Adler-32
78 // With optimizations from http://www.zlib.net/
79 
80 // The largest prime smaller than 65536
81 #define MOD_ADLER 65521
82 // MAX_BLOCK is the largest n such that 255n(n+1)/2 + (n+1)(MAX_BLOCK-1) <= 2^32-1
83 #define MAX_BLOCK 5552
84 
UpdateCRC(unsigned char * bytes,size_t size)85 void MachoID::UpdateCRC(unsigned char *bytes, size_t size) {
86 // Unrolled loops for summing
87 #define DO1(buf,i)  {sum1 += (buf)[i]; sum2 += sum1;}
88 #define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
89 #define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
90 #define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
91 #define DO16(buf)   DO8(buf,0); DO8(buf,8);
92   // Split up the crc
93   uint32_t sum1 = crc_ & 0xFFFF;
94   uint32_t sum2 = (crc_ >> 16) & 0xFFFF;
95 
96   // Do large blocks
97   while (size >= MAX_BLOCK) {
98     size -= MAX_BLOCK;
99     int block_count = MAX_BLOCK / 16;
100     do {
101       DO16(bytes);
102       bytes += 16;
103     } while (--block_count);
104     sum1 %= MOD_ADLER;
105     sum2 %= MOD_ADLER;
106   }
107 
108   // Do remaining bytes
109   if (size) {
110     while (size >= 16) {
111       size -= 16;
112       DO16(bytes);
113       bytes += 16;
114     }
115     while (size--) {
116       sum1 += *bytes++;
117       sum2 += sum1;
118     }
119     sum1 %= MOD_ADLER;
120     sum2 %= MOD_ADLER;
121     crc_ = (sum2 << 16) | sum1;
122   }
123 }
124 
UpdateMD5(unsigned char * bytes,size_t size)125 void MachoID::UpdateMD5(unsigned char *bytes, size_t size) {
126   MD5Update(&md5_context_, bytes, static_cast<unsigned>(size));
127 }
128 
Update(MachoWalker * walker,off_t offset,size_t size)129 void MachoID::Update(MachoWalker *walker, off_t offset, size_t size) {
130   if (!update_function_ || !size)
131     return;
132 
133   // Read up to 4k bytes at a time
134   unsigned char buffer[4096];
135   size_t buffer_size;
136   off_t file_offset = offset;
137   while (size > 0) {
138     if (size > sizeof(buffer)) {
139       buffer_size = sizeof(buffer);
140       size -= buffer_size;
141     } else {
142       buffer_size = size;
143       size = 0;
144     }
145 
146     if (!walker->ReadBytes(buffer, buffer_size, file_offset))
147       return;
148 
149     (this->*update_function_)(buffer, buffer_size);
150     file_offset += buffer_size;
151   }
152 }
153 
UUIDCommand(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char bytes[16])154 bool MachoID::UUIDCommand(cpu_type_t cpu_type,
155                           cpu_subtype_t cpu_subtype,
156                           unsigned char bytes[16]) {
157   struct breakpad_uuid_command uuid_cmd;
158   uuid_cmd.cmd = 0;
159   if (!WalkHeader(cpu_type, cpu_subtype, UUIDWalkerCB, &uuid_cmd))
160     return false;
161 
162   // If we found the command, we'll have initialized the uuid_command
163   // structure
164   if (uuid_cmd.cmd == LC_UUID) {
165     memcpy(bytes, uuid_cmd.uuid, sizeof(uuid_cmd.uuid));
166     return true;
167   }
168 
169   return false;
170 }
171 
IDCommand(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char identifier[16])172 bool MachoID::IDCommand(cpu_type_t cpu_type,
173                         cpu_subtype_t cpu_subtype,
174                         unsigned char identifier[16]) {
175   struct dylib_command dylib_cmd;
176   dylib_cmd.cmd = 0;
177   if (!WalkHeader(cpu_type, cpu_subtype, IDWalkerCB, &dylib_cmd))
178     return false;
179 
180   // If we found the command, we'll have initialized the dylib_command
181   // structure
182   if (dylib_cmd.cmd == LC_ID_DYLIB) {
183     // Take the hashed filename, version, and compatability version bytes
184     // to form the first 12 bytes, pad the rest with zeros
185 
186     // create a crude hash of the filename to generate the first 4 bytes
187     identifier[0] = 0;
188     identifier[1] = 0;
189     identifier[2] = 0;
190     identifier[3] = 0;
191 
192     for (int j = 0, i = (int)strlen(path_)-1; i>=0 && path_[i]!='/'; ++j, --i) {
193       identifier[j%4] += path_[i];
194     }
195 
196     identifier[4] = (dylib_cmd.dylib.current_version >> 24) & 0xFF;
197     identifier[5] = (dylib_cmd.dylib.current_version >> 16) & 0xFF;
198     identifier[6] = (dylib_cmd.dylib.current_version >> 8) & 0xFF;
199     identifier[7] = dylib_cmd.dylib.current_version & 0xFF;
200     identifier[8] = (dylib_cmd.dylib.compatibility_version >> 24) & 0xFF;
201     identifier[9] = (dylib_cmd.dylib.compatibility_version >> 16) & 0xFF;
202     identifier[10] = (dylib_cmd.dylib.compatibility_version >> 8) & 0xFF;
203     identifier[11] = dylib_cmd.dylib.compatibility_version & 0xFF;
204     identifier[12] = (cpu_type >> 24) & 0xFF;
205     identifier[13] = (cpu_type >> 16) & 0xFF;
206     identifier[14] = (cpu_type >> 8) & 0xFF;
207     identifier[15] = cpu_type & 0xFF;
208 
209     return true;
210   }
211 
212   return false;
213 }
214 
Adler32(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype)215 uint32_t MachoID::Adler32(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype) {
216   update_function_ = &MachoID::UpdateCRC;
217   crc_ = 0;
218 
219   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
220     return 0;
221 
222   return crc_;
223 }
224 
MD5(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,unsigned char identifier[16])225 bool MachoID::MD5(cpu_type_t cpu_type, cpu_subtype_t cpu_subtype, unsigned char identifier[16]) {
226   update_function_ = &MachoID::UpdateMD5;
227 
228   MD5Init(&md5_context_);
229 
230   if (!WalkHeader(cpu_type, cpu_subtype, WalkerCB, this))
231     return false;
232 
233   MD5Final(identifier, &md5_context_);
234   return true;
235 }
236 
WalkHeader(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,MachoWalker::LoadCommandCallback callback,void * context)237 bool MachoID::WalkHeader(cpu_type_t cpu_type,
238                          cpu_subtype_t cpu_subtype,
239                          MachoWalker::LoadCommandCallback callback,
240                          void *context) {
241   if (memory_) {
242     MachoWalker walker(memory_, memory_size_, callback, context);
243     return walker.WalkHeader(cpu_type, cpu_subtype);
244   } else {
245     MachoWalker walker(path_, callback, context);
246     return walker.WalkHeader(cpu_type, cpu_subtype);
247   }
248 }
249 
250 // static
WalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)251 bool MachoID::WalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
252                        bool swap, void *context) {
253   MachoID *macho_id = (MachoID *)context;
254 
255   if (cmd->cmd == LC_SEGMENT) {
256     struct segment_command seg;
257 
258     if (!walker->ReadBytes(&seg, sizeof(seg), offset))
259       return false;
260 
261     if (swap)
262       breakpad_swap_segment_command(&seg);
263 
264     struct mach_header_64 header;
265     off_t header_offset;
266 
267     if (!walker->CurrentHeader(&header, &header_offset))
268       return false;
269 
270     // Process segments that have sections:
271     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
272     offset += sizeof(struct segment_command);
273     struct section sec;
274     for (unsigned long i = 0; i < seg.nsects; ++i) {
275       if (!walker->ReadBytes(&sec, sizeof(sec), offset))
276         return false;
277 
278       if (swap)
279         breakpad_swap_section(&sec, 1);
280 
281       // sections of type S_ZEROFILL are "virtual" and contain no data
282       // in the file itself
283       if ((sec.flags & SECTION_TYPE) != S_ZEROFILL && sec.offset != 0)
284         macho_id->Update(walker, header_offset + sec.offset, sec.size);
285 
286       offset += sizeof(struct section);
287     }
288   } else if (cmd->cmd == LC_SEGMENT_64) {
289     struct segment_command_64 seg64;
290 
291     if (!walker->ReadBytes(&seg64, sizeof(seg64), offset))
292       return false;
293 
294     if (swap)
295       breakpad_swap_segment_command_64(&seg64);
296 
297     struct mach_header_64 header;
298     off_t header_offset;
299 
300     if (!walker->CurrentHeader(&header, &header_offset))
301       return false;
302 
303     // Process segments that have sections:
304     // (e.g., __TEXT, __DATA, __IMPORT, __OBJC)
305     offset += sizeof(struct segment_command_64);
306     struct section_64 sec64;
307     for (unsigned long i = 0; i < seg64.nsects; ++i) {
308       if (!walker->ReadBytes(&sec64, sizeof(sec64), offset))
309         return false;
310 
311       if (swap)
312         breakpad_swap_section_64(&sec64, 1);
313 
314       // sections of type S_ZEROFILL are "virtual" and contain no data
315       // in the file itself
316       if ((sec64.flags & SECTION_TYPE) != S_ZEROFILL && sec64.offset != 0)
317         macho_id->Update(walker,
318                          header_offset + sec64.offset,
319                          (size_t)sec64.size);
320 
321       offset += sizeof(struct section_64);
322     }
323   }
324 
325   // Continue processing
326   return true;
327 }
328 
329 // static
UUIDWalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)330 bool MachoID::UUIDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
331                            bool swap, void *context) {
332   if (cmd->cmd == LC_UUID) {
333     struct breakpad_uuid_command *uuid_cmd =
334       (struct breakpad_uuid_command *)context;
335 
336     if (!walker->ReadBytes(uuid_cmd, sizeof(struct breakpad_uuid_command),
337                            offset))
338       return false;
339 
340     if (swap)
341       breakpad_swap_uuid_command(uuid_cmd);
342 
343     return false;
344   }
345 
346   // Continue processing
347   return true;
348 }
349 
350 // static
IDWalkerCB(MachoWalker * walker,load_command * cmd,off_t offset,bool swap,void * context)351 bool MachoID::IDWalkerCB(MachoWalker *walker, load_command *cmd, off_t offset,
352                          bool swap, void *context) {
353   if (cmd->cmd == LC_ID_DYLIB) {
354     struct dylib_command *dylib_cmd = (struct dylib_command *)context;
355 
356     if (!walker->ReadBytes(dylib_cmd, sizeof(struct dylib_command), offset))
357       return false;
358 
359     if (swap)
360       breakpad_swap_dylib_command(dylib_cmd);
361 
362     return false;
363   }
364 
365   // Continue processing
366   return true;
367 }
368 
369 }  // namespace MacFileUtilities
370