1 /*
2  * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
3  *
4  * @APPLE_LICENSE_HEADER_START@
5  *
6  * This file contains Original Code and/or Modifications of Original Code
7  * as defined in and that are subject to the Apple Public Source License
8  * Version 2.0 (the 'License'). You may not use this file except in
9  * compliance with the License. Please obtain a copy of the License at
10  * http://www.opensource.apple.com/apsl/ and read it before using this
11  * file.
12  *
13  * The Original Code and all software distributed under the License are
14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18  * Please see the License for the specific language governing rights and
19  * limitations under the License.
20  *
21  * @APPLE_LICENSE_HEADER_END@
22  */
23 /*
24  * Copyright (c) 1989, 1993
25  * The Regents of the University of California.  All rights reserved.
26  *
27  * Redistribution and use in source and binary forms, with or without
28  * modification, are permitted provided that the following conditions
29  * are met:
30  * 1. Redistributions of source code must retain the above copyright
31  *    notice, this list of conditions and the following disclaimer.
32  * 2. Redistributions in binary form must reproduce the above copyright
33  *    notice, this list of conditions and the following disclaimer in the
34  *    documentation and/or other materials provided with the distribution.
35  * 3. All advertising materials mentioning features or use of this software
36  *    must display the following acknowledgement:
37  *      This product includes software developed by the University of
38  *      California, Berkeley and its contributors.
39  * 4. Neither the name of the University nor the names of its contributors
40  *    may be used to endorse or promote products derived from this software
41  *    without specific prior written permission.
42  *
43  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
44  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
45  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
46  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
47  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
48  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
49  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
50  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
51  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
52  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
53  * SUCH DAMAGE.
54  */
55 
56 
57 /*
58  * This file was copied from libc/gen/nlist.c from Darwin's source code
59  * The version of nlist used as a base is from 10.5.2, libc-498
60  * http://www.opensource.apple.com/darwinsource/10.5.2/Libc-498/gen/nlist.c
61  *
62  * The full tarball is at:
63  * http://www.opensource.apple.com/darwinsource/tarballs/apsl/Libc-498.tar.gz
64  *
65  * I've modified it to be compatible with 64-bit images.
66 */
67 
68 #include "breakpad_nlist_64.h"
69 
70 #include <CoreFoundation/CoreFoundation.h>
71 #include <fcntl.h>
72 #include <mach-o/nlist.h>
73 #include <mach-o/loader.h>
74 #include <mach-o/fat.h>
75 #include <mach/mach.h>
76 #include <stdio.h>
77 #include <stdlib.h>
78 #include <sys/types.h>
79 #include <sys/uio.h>
80 #include <TargetConditionals.h>
81 #include <unistd.h>
82 
83 /* Stuff lifted from <a.out.h> and <sys/exec.h> since they are gone */
84 /*
85  * Header prepended to each a.out file.
86  */
87 struct exec {
88   unsigned short  a_machtype;     /* machine type */
89   unsigned short  a_magic;        /* magic number */
90   unsigned long a_text;         /* size of text segment */
91   unsigned long a_data;         /* size of initialized data */
92   unsigned long a_bss;          /* size of uninitialized data */
93   unsigned long a_syms;         /* size of symbol table */
94   unsigned long a_entry;        /* entry point */
95   unsigned long a_trsize;       /* size of text relocation */
96   unsigned long a_drsize;       /* size of data relocation */
97 };
98 
99 #define OMAGIC  0407            /* old impure format */
100 #define NMAGIC  0410            /* read-only text */
101 #define ZMAGIC  0413            /* demand load format */
102 
103 #define N_BADMAG(x)                                                     \
104   (((x).a_magic)!=OMAGIC && ((x).a_magic)!=NMAGIC && ((x).a_magic)!=ZMAGIC)
105 #define N_TXTOFF(x)                                     \
106   ((x).a_magic==ZMAGIC ? 0 : sizeof (struct exec))
107 #define N_SYMOFF(x)                                                     \
108   (N_TXTOFF(x) + (x).a_text+(x).a_data + (x).a_trsize+(x).a_drsize)
109 
110 // Traits structs for specializing function templates to handle
111 // 32-bit/64-bit Mach-O files.
112 template<typename T>
113 struct MachBits {};
114 
115 typedef struct nlist nlist32;
116 typedef struct nlist_64 nlist64;
117 
118 template<>
119 struct MachBits<nlist32> {
120   typedef mach_header mach_header_type;
121   typedef uint32_t word_type;
122   static const uint32_t magic = MH_MAGIC;
123 };
124 
125 template<>
126 struct MachBits<nlist64> {
127   typedef mach_header_64 mach_header_type;
128   typedef uint64_t word_type;
129   static const uint32_t magic = MH_MAGIC_64;
130 };
131 
132 template<typename nlist_type>
133 int
134 __breakpad_fdnlist(int fd, nlist_type *list, const char **symbolNames,
135                    cpu_type_t cpu_type);
136 
137 /*
138  * nlist - retreive attributes from name list (string table version)
139  */
140 
141 template <typename nlist_type>
breakpad_nlist_common(const char * name,nlist_type * list,const char ** symbolNames,cpu_type_t cpu_type)142 int breakpad_nlist_common(const char *name,
143                           nlist_type *list,
144                           const char **symbolNames,
145                           cpu_type_t cpu_type) {
146   int fd = open(name, O_RDONLY, 0);
147   if (fd < 0)
148     return -1;
149   int n = __breakpad_fdnlist(fd, list, symbolNames, cpu_type);
150   close(fd);
151   return n;
152 }
153 
breakpad_nlist(const char * name,struct nlist * list,const char ** symbolNames,cpu_type_t cpu_type)154 int breakpad_nlist(const char *name,
155                    struct nlist *list,
156                    const char **symbolNames,
157                    cpu_type_t cpu_type) {
158   return breakpad_nlist_common(name, list, symbolNames, cpu_type);
159 }
160 
breakpad_nlist(const char * name,struct nlist_64 * list,const char ** symbolNames,cpu_type_t cpu_type)161 int breakpad_nlist(const char *name,
162                    struct nlist_64 *list,
163                    const char **symbolNames,
164                    cpu_type_t cpu_type) {
165   return breakpad_nlist_common(name, list, symbolNames, cpu_type);
166 }
167 
168 /* Note: __fdnlist() is called from kvm_nlist in libkvm's kvm.c */
169 
170 template<typename nlist_type>
__breakpad_fdnlist(int fd,nlist_type * list,const char ** symbolNames,cpu_type_t cpu_type)171 int __breakpad_fdnlist(int fd, nlist_type *list, const char **symbolNames,
172                        cpu_type_t cpu_type) {
173   typedef typename MachBits<nlist_type>::mach_header_type mach_header_type;
174   typedef typename MachBits<nlist_type>::word_type word_type;
175 
176   const uint32_t magic = MachBits<nlist_type>::magic;
177 
178   int maxlen = 500;
179   int nreq = 0;
180   for (nlist_type* q = list;
181        symbolNames[q-list] && symbolNames[q-list][0];
182        q++, nreq++) {
183 
184     q->n_type = 0;
185     q->n_value = 0;
186     q->n_desc = 0;
187     q->n_sect = 0;
188     q->n_un.n_strx = 0;
189   }
190 
191   struct exec buf;
192   if (read(fd, (char *)&buf, sizeof(buf)) != sizeof(buf) ||
193       (N_BADMAG(buf) && *((uint32_t *)&buf) != magic &&
194         CFSwapInt32BigToHost(*((uint32_t *)&buf)) != FAT_MAGIC &&
195        /* The following is the big-endian ppc64 check */
196        (*((uint32_t*)&buf)) != FAT_MAGIC)) {
197     return -1;
198   }
199 
200   /* Deal with fat file if necessary */
201   unsigned arch_offset = 0;
202   if (CFSwapInt32BigToHost(*((uint32_t *)&buf)) == FAT_MAGIC ||
203       /* The following is the big-endian ppc64 check */
204       *((unsigned int *)&buf) == FAT_MAGIC) {
205     /* Read in the fat header */
206     struct fat_header fh;
207     if (lseek(fd, 0, SEEK_SET) == -1) {
208       return -1;
209     }
210     if (read(fd, (char *)&fh, sizeof(fh)) != sizeof(fh)) {
211       return -1;
212     }
213 
214     /* Convert fat_narchs to host byte order */
215     fh.nfat_arch = CFSwapInt32BigToHost(fh.nfat_arch);
216 
217     /* Read in the fat archs */
218     struct fat_arch *fat_archs =
219         (struct fat_arch *)malloc(fh.nfat_arch * sizeof(struct fat_arch));
220     if (fat_archs == NULL) {
221       return -1;
222     }
223     if (read(fd, (char *)fat_archs,
224              sizeof(struct fat_arch) * fh.nfat_arch) !=
225         (ssize_t)(sizeof(struct fat_arch) * fh.nfat_arch)) {
226       free(fat_archs);
227       return -1;
228     }
229 
230     /*
231      * Convert archs to host byte ordering (a constraint of
232      * cpusubtype_getbestarch()
233      */
234     for (unsigned i = 0; i < fh.nfat_arch; i++) {
235       fat_archs[i].cputype =
236         CFSwapInt32BigToHost(fat_archs[i].cputype);
237       fat_archs[i].cpusubtype =
238         CFSwapInt32BigToHost(fat_archs[i].cpusubtype);
239       fat_archs[i].offset =
240         CFSwapInt32BigToHost(fat_archs[i].offset);
241       fat_archs[i].size =
242         CFSwapInt32BigToHost(fat_archs[i].size);
243       fat_archs[i].align =
244         CFSwapInt32BigToHost(fat_archs[i].align);
245     }
246 
247     struct fat_arch *fap = NULL;
248     for (unsigned i = 0; i < fh.nfat_arch; i++) {
249       if (fat_archs[i].cputype == cpu_type) {
250         fap = &fat_archs[i];
251         break;
252       }
253     }
254 
255     if (!fap) {
256       free(fat_archs);
257       return -1;
258     }
259     arch_offset = fap->offset;
260     free(fat_archs);
261 
262     /* Read in the beginning of the architecture-specific file */
263     if (lseek(fd, arch_offset, SEEK_SET) == -1) {
264       return -1;
265     }
266     if (read(fd, (char *)&buf, sizeof(buf)) != sizeof(buf)) {
267       return -1;
268     }
269   }
270 
271   off_t sa;  /* symbol address */
272   off_t ss;  /* start of strings */
273   register_t n;
274   if (*((unsigned int *)&buf) == magic) {
275     if (lseek(fd, arch_offset, SEEK_SET) == -1) {
276       return -1;
277     }
278     mach_header_type mh;
279     if (read(fd, (char *)&mh, sizeof(mh)) != sizeof(mh)) {
280       return -1;
281     }
282 
283     struct load_command *load_commands =
284         (struct load_command *)malloc(mh.sizeofcmds);
285     if (load_commands == NULL) {
286       return -1;
287     }
288     if (read(fd, (char *)load_commands, mh.sizeofcmds) !=
289         (ssize_t)mh.sizeofcmds) {
290       free(load_commands);
291       return -1;
292     }
293     struct symtab_command *stp = NULL;
294     struct load_command *lcp = load_commands;
295     // iterate through all load commands, looking for
296     // LC_SYMTAB load command
297     for (uint32_t i = 0; i < mh.ncmds; i++) {
298       if (lcp->cmdsize % sizeof(word_type) != 0 ||
299           lcp->cmdsize <= 0 ||
300           (char *)lcp + lcp->cmdsize >
301           (char *)load_commands + mh.sizeofcmds) {
302         free(load_commands);
303         return -1;
304       }
305       if (lcp->cmd == LC_SYMTAB) {
306         if (lcp->cmdsize !=
307             sizeof(struct symtab_command)) {
308           free(load_commands);
309           return -1;
310         }
311         stp = (struct symtab_command *)lcp;
312         break;
313       }
314       lcp = (struct load_command *)
315         ((char *)lcp + lcp->cmdsize);
316     }
317     if (stp == NULL) {
318       free(load_commands);
319       return -1;
320     }
321     // sa points to the beginning of the symbol table
322     sa = stp->symoff + arch_offset;
323     // ss points to the beginning of the string table
324     ss = stp->stroff + arch_offset;
325     // n is the number of bytes in the symbol table
326     // each symbol table entry is an nlist structure
327     n = stp->nsyms * sizeof(nlist_type);
328     free(load_commands);
329   } else {
330     sa = N_SYMOFF(buf) + arch_offset;
331     ss = sa + buf.a_syms + arch_offset;
332     n = buf.a_syms;
333   }
334 
335   if (lseek(fd, sa, SEEK_SET) == -1) {
336     return -1;
337   }
338 
339   // the algorithm here is to read the nlist entries in m-sized
340   // chunks into q.  q is then iterated over. for each entry in q,
341   // use the string table index(q->n_un.n_strx) to read the symbol
342   // name, then scan the nlist entries passed in by the user(via p),
343   // and look for a match
344   while (n) {
345     nlist_type space[BUFSIZ/sizeof (nlist_type)];
346     register_t m = sizeof (space);
347 
348     if (n < m)
349       m = n;
350     if (read(fd, (char *)space, m) != m)
351       break;
352     n -= m;
353     off_t savpos = lseek(fd, 0, SEEK_CUR);
354     if (savpos == -1) {
355       return -1;
356     }
357     for (nlist_type* q = space; (m -= sizeof(nlist_type)) >= 0; q++) {
358       char nambuf[BUFSIZ];
359 
360       if (q->n_un.n_strx == 0 || q->n_type & N_STAB)
361         continue;
362 
363       // seek to the location in the binary where the symbol
364       // name is stored & read it into memory
365       if (lseek(fd, ss+q->n_un.n_strx, SEEK_SET) == -1) {
366         return -1;
367       }
368       if (read(fd, nambuf, maxlen+1) == -1) {
369         return -1;
370       }
371       const char *s2 = nambuf;
372       for (nlist_type *p = list;
373            symbolNames[p-list] && symbolNames[p-list][0];
374            p++) {
375         // get the symbol name the user has passed in that
376         // corresponds to the nlist entry that we're looking at
377         const char *s1 = symbolNames[p - list];
378         while (*s1) {
379           if (*s1++ != *s2++)
380             goto cont;
381         }
382         if (*s2)
383           goto cont;
384 
385         p->n_value = q->n_value;
386         p->n_type = q->n_type;
387         p->n_desc = q->n_desc;
388         p->n_sect = q->n_sect;
389         p->n_un.n_strx = q->n_un.n_strx;
390         if (--nreq == 0)
391           return nreq;
392 
393         break;
394       cont:           ;
395       }
396     }
397     if (lseek(fd, savpos, SEEK_SET) == -1) {
398       return -1;
399     }
400   }
401   return nreq;
402 }
403