1 
2 /*--------------------------------------------------------------------*/
3 /*--- Platform-specific syscalls stuff.        syswrap-x86-linux.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2015 Nicholas Nethercote
11       njn@valgrind.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #if defined(VGP_x86_linux)
32 
33 /* TODO/FIXME jrs 20050207: assignments to the syscall return result
34    in interrupted_syscall() need to be reviewed.  They don't seem
35    to assign the shadow state.
36 */
37 
38 #include "pub_core_basics.h"
39 #include "pub_core_vki.h"
40 #include "pub_core_vkiscnums.h"
41 #include "pub_core_threadstate.h"
42 #include "pub_core_aspacemgr.h"
43 #include "pub_core_debuglog.h"
44 #include "pub_core_libcbase.h"
45 #include "pub_core_libcassert.h"
46 #include "pub_core_libcprint.h"
47 #include "pub_core_libcproc.h"
48 #include "pub_core_libcsignal.h"
49 #include "pub_core_mallocfree.h"
50 #include "pub_core_options.h"
51 #include "pub_core_scheduler.h"
52 #include "pub_core_sigframe.h"      // For VG_(sigframe_destroy)()
53 #include "pub_core_signals.h"
54 #include "pub_core_syscall.h"
55 #include "pub_core_syswrap.h"
56 #include "pub_core_tooliface.h"
57 
58 #include "priv_types_n_macros.h"
59 #include "priv_syswrap-generic.h"    /* for decls of generic wrappers */
60 #include "priv_syswrap-linux.h"      /* for decls of linux-ish wrappers */
61 #include "priv_syswrap-linux-variants.h" /* decls of linux variant wrappers */
62 #include "priv_syswrap-main.h"
63 
64 
65 /* ---------------------------------------------------------------------
66    clone() handling
67    ------------------------------------------------------------------ */
68 
69 /* Call f(arg1), but first switch stacks, using 'stack' as the new
70    stack, and use 'retaddr' as f's return-to address.  Also, clear all
71    the integer registers before entering f.*/
72 __attribute__((noreturn))
73 void ML_(call_on_new_stack_0_1) ( Addr stack,
74 			          Addr retaddr,
75 			          void (*f)(Word),
76                                   Word arg1 );
77 //  4(%esp) == stack
78 //  8(%esp) == retaddr
79 // 12(%esp) == f
80 // 16(%esp) == arg1
81 asm(
82 ".text\n"
83 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
84 "vgModuleLocal_call_on_new_stack_0_1:\n"
85 "   movl %esp, %esi\n"     // remember old stack pointer
86 "   movl 4(%esi), %esp\n"  // set stack
87 "   pushl 16(%esi)\n"      // arg1 to stack
88 "   pushl  8(%esi)\n"      // retaddr to stack
89 "   pushl 12(%esi)\n"      // f to stack
90 "   movl $0, %eax\n"       // zero all GP regs
91 "   movl $0, %ebx\n"
92 "   movl $0, %ecx\n"
93 "   movl $0, %edx\n"
94 "   movl $0, %esi\n"
95 "   movl $0, %edi\n"
96 "   movl $0, %ebp\n"
97 "   ret\n"                 // jump to f
98 "   ud2\n"                 // should never get here
99 ".previous\n"
100 );
101 
102 
103 /*
104         Perform a clone system call.  clone is strange because it has
105         fork()-like return-twice semantics, so it needs special
106         handling here.
107 
108         Upon entry, we have:
109 
110             int (fn)(void*)     in  0+FSZ(%esp)
111             void* child_stack   in  4+FSZ(%esp)
112             int flags           in  8+FSZ(%esp)
113             void* arg           in 12+FSZ(%esp)
114             pid_t* child_tid    in 16+FSZ(%esp)
115             pid_t* parent_tid   in 20+FSZ(%esp)
116             void* tls_ptr       in 24+FSZ(%esp)
117 
118         System call requires:
119 
120             int    $__NR_clone  in %eax
121             int    flags        in %ebx
122             void*  child_stack  in %ecx
123             pid_t* parent_tid   in %edx
124             pid_t* child_tid    in %edi
125             void*  tls_ptr      in %esi
126 
127 	Returns an Int encoded in the linux-x86 way, not a SysRes.
128  */
129 #define FSZ               "4+4+4+4" /* frame size = retaddr+ebx+edi+esi */
130 #define __NR_CLONE        VG_STRINGIFY(__NR_clone)
131 #define __NR_EXIT         VG_STRINGIFY(__NR_exit)
132 
133 extern
134 Int do_syscall_clone_x86_linux ( Word (*fn)(void *),
135                                  void* stack,
136                                  Int   flags,
137                                  void* arg,
138                                  Int*  child_tid,
139                                  Int*  parent_tid,
140                                  vki_modify_ldt_t * );
141 asm(
142 ".text\n"
143 ".globl do_syscall_clone_x86_linux\n"
144 "do_syscall_clone_x86_linux:\n"
145 "        push    %ebx\n"
146 "        push    %edi\n"
147 "        push    %esi\n"
148 
149          /* set up child stack with function and arg */
150 "        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
151 "        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
152 "        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
153 "        lea     -8(%ecx), %ecx\n"          /* make space on stack */
154 "        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
155 "        movl    %eax, 0(%ecx)\n"           /*   fn */
156 
157          /* get other args to clone */
158 "        movl     8+"FSZ"(%esp), %ebx\n"    /* syscall arg1: flags */
159 "        movl    20+"FSZ"(%esp), %edx\n"    /* syscall arg3: parent tid * */
160 "        movl    16+"FSZ"(%esp), %edi\n"    /* syscall arg5: child tid * */
161 "        movl    24+"FSZ"(%esp), %esi\n"    /* syscall arg4: tls_ptr * */
162 "        movl    $"__NR_CLONE", %eax\n"
163 "        int     $0x80\n"                   /* clone() */
164 "        testl   %eax, %eax\n"              /* child if retval == 0 */
165 "        jnz     1f\n"
166 
167          /* CHILD - call thread function */
168 "        popl    %eax\n"
169 "        call    *%eax\n"                   /* call fn */
170 
171          /* exit with result */
172 "        movl    %eax, %ebx\n"              /* arg1: return value from fn */
173 "        movl    $"__NR_EXIT", %eax\n"
174 "        int     $0x80\n"
175 
176          /* Hm, exit returned */
177 "        ud2\n"
178 
179 "1:\n"   /* PARENT or ERROR */
180 "        pop     %esi\n"
181 "        pop     %edi\n"
182 "        pop     %ebx\n"
183 "        ret\n"
184 ".previous\n"
185 );
186 
187 #undef FSZ
188 #undef __NR_CLONE
189 #undef __NR_EXIT
190 
191 
192 // forward declarations
193 static void setup_child ( ThreadArchState*, ThreadArchState*, Bool );
194 static SysRes sys_set_thread_area ( ThreadId, vki_modify_ldt_t* );
195 
196 /*
197    When a client clones, we need to keep track of the new thread.  This means:
198    1. allocate a ThreadId+ThreadState+stack for the thread
199 
200    2. initialize the thread's new VCPU state
201 
202    3. create the thread using the same args as the client requested,
203    but using the scheduler entrypoint for EIP, and a separate stack
204    for ESP.
205  */
do_clone(ThreadId ptid,UInt flags,Addr esp,Int * parent_tidptr,Int * child_tidptr,vki_modify_ldt_t * tlsinfo)206 static SysRes do_clone ( ThreadId ptid,
207                          UInt flags, Addr esp,
208                          Int* parent_tidptr,
209                          Int* child_tidptr,
210                          vki_modify_ldt_t *tlsinfo)
211 {
212    static const Bool debug = False;
213 
214    ThreadId     ctid = VG_(alloc_ThreadState)();
215    ThreadState* ptst = VG_(get_ThreadState)(ptid);
216    ThreadState* ctst = VG_(get_ThreadState)(ctid);
217    UWord*       stack;
218    SysRes       res;
219    Int          eax;
220    vki_sigset_t blockall, savedmask;
221 
222    VG_(sigfillset)(&blockall);
223 
224    vg_assert(VG_(is_running_thread)(ptid));
225    vg_assert(VG_(is_valid_tid)(ctid));
226 
227    stack = (UWord*)ML_(allocstack)(ctid);
228    if (stack == NULL) {
229       res = VG_(mk_SysRes_Error)( VKI_ENOMEM );
230       goto out;
231    }
232 
233    /* Copy register state
234 
235       Both parent and child return to the same place, and the code
236       following the clone syscall works out which is which, so we
237       don't need to worry about it.
238 
239       The parent gets the child's new tid returned from clone, but the
240       child gets 0.
241 
242       If the clone call specifies a NULL esp for the new thread, then
243       it actually gets a copy of the parent's esp.
244    */
245    /* Note: the clone call done by the Quadrics Elan3 driver specifies
246       clone flags of 0xF00, and it seems to rely on the assumption
247       that the child inherits a copy of the parent's GDT.
248       setup_child takes care of setting that up. */
249    setup_child( &ctst->arch, &ptst->arch, True );
250 
251    /* Make sys_clone appear to have returned Success(0) in the
252       child. */
253    ctst->arch.vex.guest_EAX = 0;
254 
255    if (esp != 0)
256       ctst->arch.vex.guest_ESP = esp;
257 
258    ctst->os_state.parent = ptid;
259 
260    /* inherit signal mask */
261    ctst->sig_mask     = ptst->sig_mask;
262    ctst->tmp_sig_mask = ptst->sig_mask;
263 
264    /* Start the child with its threadgroup being the same as the
265       parent's.  This is so that any exit_group calls that happen
266       after the child is created but before it sets its
267       os_state.threadgroup field for real (in thread_wrapper in
268       syswrap-linux.c), really kill the new thread.  a.k.a this avoids
269       a race condition in which the thread is unkillable (via
270       exit_group) because its threadgroup is not set.  The race window
271       is probably only a few hundred or a few thousand cycles long.
272       See #226116. */
273    ctst->os_state.threadgroup = ptst->os_state.threadgroup;
274 
275    ML_(guess_and_register_stack) (esp, ctst);
276 
277    /* Assume the clone will succeed, and tell any tool that wants to
278       know that this thread has come into existence.  We cannot defer
279       it beyond this point because sys_set_thread_area, just below,
280       causes tCheck to assert by making references to the new ThreadId
281       if we don't state the new thread exists prior to that point.
282       If the clone fails, we'll send out a ll_exit notification for it
283       at the out: label below, to clean up. */
284    vg_assert(VG_(owns_BigLock_LL)(ptid));
285    VG_TRACK ( pre_thread_ll_create, ptid, ctid );
286 
287    if (flags & VKI_CLONE_SETTLS) {
288       if (debug)
289 	 VG_(printf)("clone child has SETTLS: tls info at %p: idx=%u "
290                      "base=%#lx limit=%x; esp=%#x fs=%x gs=%x\n",
291 		     tlsinfo, tlsinfo->entry_number,
292                      tlsinfo->base_addr, tlsinfo->limit,
293 		     ptst->arch.vex.guest_ESP,
294 		     ctst->arch.vex.guest_FS, ctst->arch.vex.guest_GS);
295       res = sys_set_thread_area(ctid, tlsinfo);
296       if (sr_isError(res))
297 	 goto out;
298    }
299 
300    flags &= ~VKI_CLONE_SETTLS;
301 
302    /* start the thread with everything blocked */
303    VG_(sigprocmask)(VKI_SIG_SETMASK, &blockall, &savedmask);
304 
305    /* Create the new thread */
306    eax = do_syscall_clone_x86_linux(
307             ML_(start_thread_NORETURN), stack, flags, &VG_(threads)[ctid],
308             child_tidptr, parent_tidptr, NULL
309          );
310    res = VG_(mk_SysRes_x86_linux)( eax );
311 
312    VG_(sigprocmask)(VKI_SIG_SETMASK, &savedmask, NULL);
313 
314   out:
315    if (sr_isError(res)) {
316       /* clone failed */
317       VG_(cleanup_thread)(&ctst->arch);
318       ctst->status = VgTs_Empty;
319       /* oops.  Better tell the tool the thread exited in a hurry :-) */
320       VG_TRACK( pre_thread_ll_exit, ctid );
321    }
322 
323    return res;
324 }
325 
326 
327 /* ---------------------------------------------------------------------
328    LDT/GDT simulation
329    ------------------------------------------------------------------ */
330 
331 /* Details of the LDT simulation
332    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
333 
334    When a program runs natively, the linux kernel allows each *thread*
335    in it to have its own LDT.  Almost all programs never do this --
336    it's wildly unportable, after all -- and so the kernel never
337    allocates the structure, which is just as well as an LDT occupies
338    64k of memory (8192 entries of size 8 bytes).
339 
340    A thread may choose to modify its LDT entries, by doing the
341    __NR_modify_ldt syscall.  In such a situation the kernel will then
342    allocate an LDT structure for it.  Each LDT entry is basically a
343    (base, limit) pair.  A virtual address in a specific segment is
344    translated to a linear address by adding the segment's base value.
345    In addition, the virtual address must not exceed the limit value.
346 
347    To use an LDT entry, a thread loads one of the segment registers
348    (%cs, %ss, %ds, %es, %fs, %gs) with the index of the LDT entry (0
349    .. 8191) it wants to use.  In fact, the required value is (index <<
350    3) + 7, but that's not important right now.  Any normal instruction
351    which includes an addressing mode can then be made relative to that
352    LDT entry by prefixing the insn with a so-called segment-override
353    prefix, a byte which indicates which of the 6 segment registers
354    holds the LDT index.
355 
356    Now, a key constraint is that valgrind's address checks operate in
357    terms of linear addresses.  So we have to explicitly translate
358    virtual addrs into linear addrs, and that means doing a complete
359    LDT simulation.
360 
361    Calls to modify_ldt are intercepted.  For each thread, we maintain
362    an LDT (with the same normally-never-allocated optimisation that
363    the kernel does).  This is updated as expected via calls to
364    modify_ldt.
365 
366    When a thread does an amode calculation involving a segment
367    override prefix, the relevant LDT entry for the thread is
368    consulted.  It all works.
369 
370    There is a conceptual problem, which appears when switching back to
371    native execution, either temporarily to pass syscalls to the
372    kernel, or permanently, when debugging V.  Problem at such points
373    is that it's pretty pointless to copy the simulated machine's
374    segment registers to the real machine, because we'd also need to
375    copy the simulated LDT into the real one, and that's prohibitively
376    expensive.
377 
378    Fortunately it looks like no syscalls rely on the segment regs or
379    LDT being correct, so we can get away with it.  Apart from that the
380    simulation is pretty straightforward.  All 6 segment registers are
381    tracked, although only %ds, %es, %fs and %gs are allowed as
382    prefixes.  Perhaps it could be restricted even more than that -- I
383    am not sure what is and isn't allowed in user-mode.
384 */
385 
386 /* Translate a struct modify_ldt_ldt_s to a VexGuestX86SegDescr, using
387    the Linux kernel's logic (cut-n-paste of code in
388    linux/kernel/ldt.c).  */
389 
390 static
translate_to_hw_format(vki_modify_ldt_t * inn,VexGuestX86SegDescr * out,Int oldmode)391 void translate_to_hw_format ( /* IN  */ vki_modify_ldt_t* inn,
392                               /* OUT */ VexGuestX86SegDescr* out,
393                                         Int oldmode )
394 {
395    UInt entry_1, entry_2;
396    vg_assert(8 == sizeof(VexGuestX86SegDescr));
397 
398    if (0)
399       VG_(printf)("translate_to_hw_format: base %#lx, limit %u\n",
400                   inn->base_addr, inn->limit );
401 
402    /* Allow LDTs to be cleared by the user. */
403    if (inn->base_addr == 0 && inn->limit == 0) {
404       if (oldmode ||
405           (inn->contents == 0      &&
406            inn->read_exec_only == 1   &&
407            inn->seg_32bit == 0      &&
408            inn->limit_in_pages == 0   &&
409            inn->seg_not_present == 1   &&
410            inn->useable == 0 )) {
411          entry_1 = 0;
412          entry_2 = 0;
413          goto install;
414       }
415    }
416 
417    entry_1 = ((inn->base_addr & 0x0000ffff) << 16) |
418              (inn->limit & 0x0ffff);
419    entry_2 = (inn->base_addr & 0xff000000) |
420              ((inn->base_addr & 0x00ff0000) >> 16) |
421              (inn->limit & 0xf0000) |
422              ((inn->read_exec_only ^ 1) << 9) |
423              (inn->contents << 10) |
424              ((inn->seg_not_present ^ 1) << 15) |
425              (inn->seg_32bit << 22) |
426              (inn->limit_in_pages << 23) |
427              0x7000;
428    if (!oldmode)
429       entry_2 |= (inn->useable << 20);
430 
431    /* Install the new entry ...  */
432   install:
433    out->LdtEnt.Words.word1 = entry_1;
434    out->LdtEnt.Words.word2 = entry_2;
435 }
436 
437 /* Create a zeroed-out GDT. */
alloc_zeroed_x86_GDT(void)438 static VexGuestX86SegDescr* alloc_zeroed_x86_GDT ( void )
439 {
440    Int nbytes = VEX_GUEST_X86_GDT_NENT * sizeof(VexGuestX86SegDescr);
441    return VG_(calloc)("di.syswrap-x86.azxG.1", nbytes, 1);
442 }
443 
444 /* Create a zeroed-out LDT. */
alloc_zeroed_x86_LDT(void)445 static VexGuestX86SegDescr* alloc_zeroed_x86_LDT ( void )
446 {
447    Int nbytes = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
448    return VG_(calloc)("di.syswrap-x86.azxL.1", nbytes, 1);
449 }
450 
451 /* Free up an LDT or GDT allocated by the above fns. */
free_LDT_or_GDT(VexGuestX86SegDescr * dt)452 static void free_LDT_or_GDT ( VexGuestX86SegDescr* dt )
453 {
454    vg_assert(dt);
455    VG_(free)(dt);
456 }
457 
458 /* Copy contents between two existing LDTs. */
copy_LDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)459 static void copy_LDT_from_to ( VexGuestX86SegDescr* src,
460                                VexGuestX86SegDescr* dst )
461 {
462    Int i;
463    vg_assert(src);
464    vg_assert(dst);
465    for (i = 0; i < VEX_GUEST_X86_LDT_NENT; i++)
466       dst[i] = src[i];
467 }
468 
469 /* Copy contents between two existing GDTs. */
copy_GDT_from_to(VexGuestX86SegDescr * src,VexGuestX86SegDescr * dst)470 static void copy_GDT_from_to ( VexGuestX86SegDescr* src,
471                                VexGuestX86SegDescr* dst )
472 {
473    Int i;
474    vg_assert(src);
475    vg_assert(dst);
476    for (i = 0; i < VEX_GUEST_X86_GDT_NENT; i++)
477       dst[i] = src[i];
478 }
479 
480 /* Free this thread's DTs, if it has any. */
deallocate_LGDTs_for_thread(VexGuestX86State * vex)481 static void deallocate_LGDTs_for_thread ( VexGuestX86State* vex )
482 {
483    vg_assert(sizeof(HWord) == sizeof(void*));
484 
485    if (0)
486       VG_(printf)("deallocate_LGDTs_for_thread: "
487                   "ldt = 0x%lx, gdt = 0x%lx\n",
488                   vex->guest_LDT, vex->guest_GDT );
489 
490    if (vex->guest_LDT != (HWord)NULL) {
491       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_LDT );
492       vex->guest_LDT = (HWord)NULL;
493    }
494 
495    if (vex->guest_GDT != (HWord)NULL) {
496       free_LDT_or_GDT( (VexGuestX86SegDescr*)vex->guest_GDT );
497       vex->guest_GDT = (HWord)NULL;
498    }
499 }
500 
501 
502 /*
503  * linux/kernel/ldt.c
504  *
505  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
506  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
507  */
508 
509 /*
510  * read_ldt() is not really atomic - this is not a problem since
511  * synchronization of reads and writes done to the LDT has to be
512  * assured by user-space anyway. Writes are atomic, to protect
513  * the security checks done on new descriptors.
514  */
515 static
read_ldt(ThreadId tid,UChar * ptr,UInt bytecount)516 SysRes read_ldt ( ThreadId tid, UChar* ptr, UInt bytecount )
517 {
518    SysRes res;
519    UInt   i, size;
520    UChar* ldt;
521 
522    if (0)
523       VG_(printf)("read_ldt: tid = %u, ptr = %p, bytecount = %u\n",
524                   tid, ptr, bytecount );
525 
526    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
527    vg_assert(8 == sizeof(VexGuestX86SegDescr));
528 
529    ldt = (UChar*)(VG_(threads)[tid].arch.vex.guest_LDT);
530    res = VG_(mk_SysRes_Success)( 0 );
531    if (ldt == NULL)
532       /* LDT not allocated, meaning all entries are null */
533       goto out;
534 
535    size = VEX_GUEST_X86_LDT_NENT * sizeof(VexGuestX86SegDescr);
536    if (size > bytecount)
537       size = bytecount;
538 
539    res = VG_(mk_SysRes_Success)( size );
540    for (i = 0; i < size; i++)
541       ptr[i] = ldt[i];
542 
543   out:
544    return res;
545 }
546 
547 
548 static
write_ldt(ThreadId tid,void * ptr,UInt bytecount,Int oldmode)549 SysRes write_ldt ( ThreadId tid, void* ptr, UInt bytecount, Int oldmode )
550 {
551    SysRes res;
552    VexGuestX86SegDescr* ldt;
553    vki_modify_ldt_t* ldt_info;
554 
555    if (0)
556       VG_(printf)("write_ldt: tid = %u, ptr = %p, "
557                   "bytecount = %u, oldmode = %d\n",
558                   tid, ptr, bytecount, oldmode );
559 
560    vg_assert(8 == sizeof(VexGuestX86SegDescr));
561    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
562 
563    ldt      = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_LDT;
564    ldt_info = (vki_modify_ldt_t*)ptr;
565 
566    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
567    if (bytecount != sizeof(vki_modify_ldt_t))
568       goto out;
569 
570    res = VG_(mk_SysRes_Error)( VKI_EINVAL );
571    if (ldt_info->entry_number >= VEX_GUEST_X86_LDT_NENT)
572       goto out;
573    if (ldt_info->contents == 3) {
574       if (oldmode)
575          goto out;
576       if (ldt_info->seg_not_present == 0)
577          goto out;
578    }
579 
580    /* If this thread doesn't have an LDT, we'd better allocate it
581       now. */
582    if (ldt == NULL) {
583       ldt = alloc_zeroed_x86_LDT();
584       VG_(threads)[tid].arch.vex.guest_LDT = (HWord)ldt;
585    }
586 
587    /* Install the new entry ...  */
588    translate_to_hw_format ( ldt_info, &ldt[ldt_info->entry_number], oldmode );
589    res = VG_(mk_SysRes_Success)( 0 );
590 
591   out:
592    return res;
593 }
594 
595 
sys_modify_ldt(ThreadId tid,Int func,void * ptr,UInt bytecount)596 static SysRes sys_modify_ldt ( ThreadId tid,
597                                Int func, void* ptr, UInt bytecount )
598 {
599    SysRes ret = VG_(mk_SysRes_Error)( VKI_ENOSYS );
600 
601    switch (func) {
602    case 0:
603       ret = read_ldt(tid, ptr, bytecount);
604       break;
605    case 1:
606       ret = write_ldt(tid, ptr, bytecount, 1);
607       break;
608    case 2:
609       VG_(unimplemented)("sys_modify_ldt: func == 2");
610       /* god knows what this is about */
611       /* ret = read_default_ldt(ptr, bytecount); */
612       /*UNREACHED*/
613       break;
614    case 0x11:
615       ret = write_ldt(tid, ptr, bytecount, 0);
616       break;
617    }
618    return ret;
619 }
620 
621 
sys_set_thread_area(ThreadId tid,vki_modify_ldt_t * info)622 static SysRes sys_set_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
623 {
624    Int                  idx;
625    VexGuestX86SegDescr* gdt;
626 
627    vg_assert(8 == sizeof(VexGuestX86SegDescr));
628    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
629 
630    if (info == NULL)
631       return VG_(mk_SysRes_Error)( VKI_EFAULT );
632 
633    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
634 
635    /* If the thread doesn't have a GDT, allocate it now. */
636    if (!gdt) {
637       gdt = alloc_zeroed_x86_GDT();
638       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
639    }
640 
641    idx = info->entry_number;
642 
643    if (idx == -1) {
644       /* Find and use the first free entry.  Don't allocate entry
645          zero, because the hardware will never do that, and apparently
646          doing so confuses some code (perhaps stuff running on
647          Wine). */
648       for (idx = 1; idx < VEX_GUEST_X86_GDT_NENT; idx++) {
649          if (gdt[idx].LdtEnt.Words.word1 == 0
650              && gdt[idx].LdtEnt.Words.word2 == 0)
651             break;
652       }
653 
654       if (idx == VEX_GUEST_X86_GDT_NENT)
655          return VG_(mk_SysRes_Error)( VKI_ESRCH );
656    } else if (idx < 0 || idx == 0 || idx >= VEX_GUEST_X86_GDT_NENT) {
657       /* Similarly, reject attempts to use GDT[0]. */
658       return VG_(mk_SysRes_Error)( VKI_EINVAL );
659    }
660 
661    translate_to_hw_format(info, &gdt[idx], 0);
662 
663    VG_TRACK( pre_mem_write, Vg_CoreSysCall, tid,
664              "set_thread_area(info->entry)",
665              (Addr) & info->entry_number, sizeof(unsigned int) );
666    info->entry_number = idx;
667    VG_TRACK( post_mem_write, Vg_CoreSysCall, tid,
668              (Addr) & info->entry_number, sizeof(unsigned int) );
669 
670    return VG_(mk_SysRes_Success)( 0 );
671 }
672 
673 
sys_get_thread_area(ThreadId tid,vki_modify_ldt_t * info)674 static SysRes sys_get_thread_area ( ThreadId tid, vki_modify_ldt_t* info )
675 {
676    Int idx;
677    VexGuestX86SegDescr* gdt;
678 
679    vg_assert(sizeof(HWord) == sizeof(VexGuestX86SegDescr*));
680    vg_assert(8 == sizeof(VexGuestX86SegDescr));
681 
682    if (info == NULL)
683       return VG_(mk_SysRes_Error)( VKI_EFAULT );
684 
685    idx = info->entry_number;
686 
687    if (idx < 0 || idx >= VEX_GUEST_X86_GDT_NENT)
688       return VG_(mk_SysRes_Error)( VKI_EINVAL );
689 
690    gdt = (VexGuestX86SegDescr*)VG_(threads)[tid].arch.vex.guest_GDT;
691 
692    /* If the thread doesn't have a GDT, allocate it now. */
693    if (!gdt) {
694       gdt = alloc_zeroed_x86_GDT();
695       VG_(threads)[tid].arch.vex.guest_GDT = (HWord)gdt;
696    }
697 
698    info->base_addr = ( gdt[idx].LdtEnt.Bits.BaseHi << 24 ) |
699                      ( gdt[idx].LdtEnt.Bits.BaseMid << 16 ) |
700                      gdt[idx].LdtEnt.Bits.BaseLow;
701    info->limit = ( gdt[idx].LdtEnt.Bits.LimitHi << 16 ) |
702                    gdt[idx].LdtEnt.Bits.LimitLow;
703    info->seg_32bit = gdt[idx].LdtEnt.Bits.Default_Big;
704    info->contents = ( gdt[idx].LdtEnt.Bits.Type >> 2 ) & 0x3;
705    info->read_exec_only = ( gdt[idx].LdtEnt.Bits.Type & 0x1 ) ^ 0x1;
706    info->limit_in_pages = gdt[idx].LdtEnt.Bits.Granularity;
707    info->seg_not_present = gdt[idx].LdtEnt.Bits.Pres ^ 0x1;
708    info->useable = gdt[idx].LdtEnt.Bits.Sys;
709    info->reserved = 0;
710 
711    return VG_(mk_SysRes_Success)( 0 );
712 }
713 
714 /* ---------------------------------------------------------------------
715    More thread stuff
716    ------------------------------------------------------------------ */
717 
VG_(cleanup_thread)718 void VG_(cleanup_thread) ( ThreadArchState* arch )
719 {
720    /* Release arch-specific resources held by this thread. */
721    /* On x86, we have to dump the LDT and GDT. */
722    deallocate_LGDTs_for_thread( &arch->vex );
723 }
724 
725 
setup_child(ThreadArchState * child,ThreadArchState * parent,Bool inherit_parents_GDT)726 static void setup_child ( /*OUT*/ ThreadArchState *child,
727                           /*IN*/  ThreadArchState *parent,
728                           Bool inherit_parents_GDT )
729 {
730    /* We inherit our parent's guest state. */
731    child->vex = parent->vex;
732    child->vex_shadow1 = parent->vex_shadow1;
733    child->vex_shadow2 = parent->vex_shadow2;
734 
735    /* We inherit our parent's LDT. */
736    if (parent->vex.guest_LDT == (HWord)NULL) {
737       /* We hope this is the common case. */
738       child->vex.guest_LDT = (HWord)NULL;
739    } else {
740       /* No luck .. we have to take a copy of the parent's. */
741       child->vex.guest_LDT = (HWord)alloc_zeroed_x86_LDT();
742       copy_LDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_LDT,
743                         (VexGuestX86SegDescr*)child->vex.guest_LDT );
744    }
745 
746    /* Either we start with an empty GDT (the usual case) or inherit a
747       copy of our parents' one (Quadrics Elan3 driver -style clone
748       only). */
749    child->vex.guest_GDT = (HWord)NULL;
750 
751    if (inherit_parents_GDT && parent->vex.guest_GDT != (HWord)NULL) {
752       child->vex.guest_GDT = (HWord)alloc_zeroed_x86_GDT();
753       copy_GDT_from_to( (VexGuestX86SegDescr*)parent->vex.guest_GDT,
754                         (VexGuestX86SegDescr*)child->vex.guest_GDT );
755    }
756 }
757 
758 
759 /* ---------------------------------------------------------------------
760    PRE/POST wrappers for x86/Linux-specific syscalls
761    ------------------------------------------------------------------ */
762 
763 #define PRE(name)       DEFN_PRE_TEMPLATE(x86_linux, name)
764 #define POST(name)      DEFN_POST_TEMPLATE(x86_linux, name)
765 
766 /* Add prototypes for the wrappers declared here, so that gcc doesn't
767    harass us for not having prototypes.  Really this is a kludge --
768    the right thing to do is to make these wrappers 'static' since they
769    aren't visible outside this file, but that requires even more macro
770    magic. */
771 DECL_TEMPLATE(x86_linux, sys_stat64);
772 DECL_TEMPLATE(x86_linux, sys_fstatat64);
773 DECL_TEMPLATE(x86_linux, sys_fstat64);
774 DECL_TEMPLATE(x86_linux, sys_lstat64);
775 DECL_TEMPLATE(x86_linux, sys_clone);
776 DECL_TEMPLATE(x86_linux, old_mmap);
777 DECL_TEMPLATE(x86_linux, sys_mmap2);
778 DECL_TEMPLATE(x86_linux, sys_sigreturn);
779 DECL_TEMPLATE(x86_linux, sys_rt_sigreturn);
780 DECL_TEMPLATE(x86_linux, sys_modify_ldt);
781 DECL_TEMPLATE(x86_linux, sys_set_thread_area);
782 DECL_TEMPLATE(x86_linux, sys_get_thread_area);
783 DECL_TEMPLATE(x86_linux, sys_ptrace);
784 DECL_TEMPLATE(x86_linux, sys_sigsuspend);
785 DECL_TEMPLATE(x86_linux, old_select);
786 DECL_TEMPLATE(x86_linux, sys_vm86old);
787 DECL_TEMPLATE(x86_linux, sys_vm86);
788 DECL_TEMPLATE(x86_linux, sys_syscall223);
789 
PRE(old_select)790 PRE(old_select)
791 {
792    /* struct sel_arg_struct {
793       unsigned long n;
794       fd_set *inp, *outp, *exp;
795       struct timeval *tvp;
796       };
797    */
798    PRE_REG_READ1(long, "old_select", struct sel_arg_struct *, args);
799    PRE_MEM_READ( "old_select(args)", ARG1, 5*sizeof(UWord) );
800    *flags |= SfMayBlock;
801    {
802       UInt* arg_struct = (UInt*)ARG1;
803       UInt a1, a2, a3, a4, a5;
804 
805       a1 = arg_struct[0];
806       a2 = arg_struct[1];
807       a3 = arg_struct[2];
808       a4 = arg_struct[3];
809       a5 = arg_struct[4];
810 
811       PRINT("old_select ( %d, %#x, %#x, %#x, %#x )", (Int)a1,a2,a3,a4,a5);
812       if (a2 != (Addr)NULL)
813          PRE_MEM_READ( "old_select(readfds)",   a2, a1/8 /* __FD_SETSIZE/8 */ );
814       if (a3 != (Addr)NULL)
815          PRE_MEM_READ( "old_select(writefds)",  a3, a1/8 /* __FD_SETSIZE/8 */ );
816       if (a4 != (Addr)NULL)
817          PRE_MEM_READ( "old_select(exceptfds)", a4, a1/8 /* __FD_SETSIZE/8 */ );
818       if (a5 != (Addr)NULL)
819          PRE_MEM_READ( "old_select(timeout)", a5, sizeof(struct vki_timeval) );
820    }
821 }
822 
PRE(sys_clone)823 PRE(sys_clone)
824 {
825    UInt cloneflags;
826    Bool badarg = False;
827 
828    PRINT("sys_clone ( %lx, %#lx, %#lx, %#lx, %#lx )",ARG1,ARG2,ARG3,ARG4,ARG5);
829    PRE_REG_READ2(int, "clone",
830                  unsigned long, flags,
831                  void *, child_stack);
832 
833    if (ARG1 & VKI_CLONE_PARENT_SETTID) {
834       if (VG_(tdict).track_pre_reg_read) {
835          PRA3("clone", int *, parent_tidptr);
836       }
837       PRE_MEM_WRITE("clone(parent_tidptr)", ARG3, sizeof(Int));
838       if (!VG_(am_is_valid_for_client)(ARG3, sizeof(Int),
839                                              VKI_PROT_WRITE)) {
840          badarg = True;
841       }
842    }
843    if (ARG1 & VKI_CLONE_SETTLS) {
844       if (VG_(tdict).track_pre_reg_read) {
845          PRA4("clone", vki_modify_ldt_t *, tlsinfo);
846       }
847       PRE_MEM_READ("clone(tlsinfo)", ARG4, sizeof(vki_modify_ldt_t));
848       if (!VG_(am_is_valid_for_client)(ARG4, sizeof(vki_modify_ldt_t),
849                                              VKI_PROT_READ)) {
850          badarg = True;
851       }
852    }
853    if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) {
854       if (VG_(tdict).track_pre_reg_read) {
855          PRA5("clone", int *, child_tidptr);
856       }
857       PRE_MEM_WRITE("clone(child_tidptr)", ARG5, sizeof(Int));
858       if (!VG_(am_is_valid_for_client)(ARG5, sizeof(Int),
859                                              VKI_PROT_WRITE)) {
860          badarg = True;
861       }
862    }
863 
864    if (badarg) {
865       SET_STATUS_Failure( VKI_EFAULT );
866       return;
867    }
868 
869    cloneflags = ARG1;
870 
871    if (!ML_(client_signal_OK)(ARG1 & VKI_CSIGNAL)) {
872       SET_STATUS_Failure( VKI_EINVAL );
873       return;
874    }
875 
876    /* Be ultra-paranoid and filter out any clone-variants we don't understand:
877       - ??? specifies clone flags of 0x100011
878       - ??? specifies clone flags of 0x1200011.
879       - NPTL specifies clone flags of 0x7D0F00.
880       - The Quadrics Elan3 driver specifies clone flags of 0xF00.
881       - Newer Quadrics Elan3 drivers with NTPL support specify 0x410F00.
882       Everything else is rejected.
883    */
884    if (
885         1 ||
886         /* 11 Nov 05: for the time being, disable this ultra-paranoia.
887            The switch below probably does a good enough job. */
888           (cloneflags == 0x100011 || cloneflags == 0x1200011
889                                   || cloneflags == 0x7D0F00
890                                   || cloneflags == 0x790F00
891                                   || cloneflags == 0x3D0F00
892                                   || cloneflags == 0x410F00
893                                   || cloneflags == 0xF00
894                                   || cloneflags == 0xF21)) {
895      /* OK */
896    }
897    else {
898       /* Nah.  We don't like it.  Go away. */
899       goto reject;
900    }
901 
902    /* Only look at the flags we really care about */
903    switch (cloneflags & (VKI_CLONE_VM | VKI_CLONE_FS
904                          | VKI_CLONE_FILES | VKI_CLONE_VFORK)) {
905    case VKI_CLONE_VM | VKI_CLONE_FS | VKI_CLONE_FILES:
906       /* thread creation */
907       SET_STATUS_from_SysRes(
908          do_clone(tid,
909                   ARG1,         /* flags */
910                   (Addr)ARG2,   /* child ESP */
911                   (Int *)ARG3,  /* parent_tidptr */
912                   (Int *)ARG5,  /* child_tidptr */
913                   (vki_modify_ldt_t *)ARG4)); /* set_tls */
914       break;
915 
916    case VKI_CLONE_VFORK | VKI_CLONE_VM: /* vfork */
917       /* FALLTHROUGH - assume vfork == fork */
918       cloneflags &= ~(VKI_CLONE_VFORK | VKI_CLONE_VM);
919 
920    case 0: /* plain fork */
921       SET_STATUS_from_SysRes(
922          ML_(do_fork_clone)(tid,
923                        cloneflags,      /* flags */
924                        (Int *)ARG3,     /* parent_tidptr */
925                        (Int *)ARG5));   /* child_tidptr */
926       break;
927 
928    default:
929    reject:
930       /* should we just ENOSYS? */
931       VG_(message)(Vg_UserMsg, "\n");
932       VG_(message)(Vg_UserMsg, "Unsupported clone() flags: 0x%lx\n", ARG1);
933       VG_(message)(Vg_UserMsg, "\n");
934       VG_(message)(Vg_UserMsg, "The only supported clone() uses are:\n");
935       VG_(message)(Vg_UserMsg, " - via a threads library (LinuxThreads or NPTL)\n");
936       VG_(message)(Vg_UserMsg, " - via the implementation of fork or vfork\n");
937       VG_(message)(Vg_UserMsg, " - for the Quadrics Elan3 user-space driver\n");
938       VG_(unimplemented)
939          ("Valgrind does not support general clone().");
940    }
941 
942    if (SUCCESS) {
943       if (ARG1 & VKI_CLONE_PARENT_SETTID)
944          POST_MEM_WRITE(ARG3, sizeof(Int));
945       if (ARG1 & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID))
946          POST_MEM_WRITE(ARG5, sizeof(Int));
947 
948       /* Thread creation was successful; let the child have the chance
949          to run */
950       *flags |= SfYieldAfter;
951    }
952 }
953 
PRE(sys_sigreturn)954 PRE(sys_sigreturn)
955 {
956    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
957       an explanation of what follows. */
958 
959    ThreadState* tst;
960    PRINT("sys_sigreturn ( )");
961 
962    vg_assert(VG_(is_valid_tid)(tid));
963    vg_assert(tid >= 1 && tid < VG_N_THREADS);
964    vg_assert(VG_(is_running_thread)(tid));
965 
966    /* Adjust esp to point to start of frame; skip back up over
967       sigreturn sequence's "popl %eax" and handler ret addr */
968    tst = VG_(get_ThreadState)(tid);
969    tst->arch.vex.guest_ESP -= sizeof(Addr)+sizeof(Word);
970    /* XXX why does ESP change differ from rt_sigreturn case below? */
971 
972    /* This is only so that the EIP is (might be) useful to report if
973       something goes wrong in the sigreturn */
974    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
975 
976    /* Restore register state from frame and remove it */
977    VG_(sigframe_destroy)(tid, False);
978 
979    /* Tell the driver not to update the guest state with the "result",
980       and set a bogus result to keep it happy. */
981    *flags |= SfNoWriteResult;
982    SET_STATUS_Success(0);
983 
984    /* Check to see if any signals arose as a result of this. */
985    *flags |= SfPollAfter;
986 }
987 
PRE(sys_rt_sigreturn)988 PRE(sys_rt_sigreturn)
989 {
990    /* See comments on PRE(sys_rt_sigreturn) in syswrap-amd64-linux.c for
991       an explanation of what follows. */
992 
993    ThreadState* tst;
994    PRINT("sys_rt_sigreturn ( )");
995 
996    vg_assert(VG_(is_valid_tid)(tid));
997    vg_assert(tid >= 1 && tid < VG_N_THREADS);
998    vg_assert(VG_(is_running_thread)(tid));
999 
1000    /* Adjust esp to point to start of frame; skip back up over handler
1001       ret addr */
1002    tst = VG_(get_ThreadState)(tid);
1003    tst->arch.vex.guest_ESP -= sizeof(Addr);
1004    /* XXX why does ESP change differ from sigreturn case above? */
1005 
1006    /* This is only so that the EIP is (might be) useful to report if
1007       something goes wrong in the sigreturn */
1008    ML_(fixup_guest_state_to_restart_syscall)(&tst->arch);
1009 
1010    /* Restore register state from frame and remove it */
1011    VG_(sigframe_destroy)(tid, True);
1012 
1013    /* Tell the driver not to update the guest state with the "result",
1014       and set a bogus result to keep it happy. */
1015    *flags |= SfNoWriteResult;
1016    SET_STATUS_Success(0);
1017 
1018    /* Check to see if any signals arose as a result of this. */
1019    *flags |= SfPollAfter;
1020 }
1021 
PRE(sys_modify_ldt)1022 PRE(sys_modify_ldt)
1023 {
1024    PRINT("sys_modify_ldt ( %ld, %#lx, %lu )", SARG1, ARG2, ARG3);
1025    PRE_REG_READ3(int, "modify_ldt", int, func, void *, ptr,
1026                  unsigned long, bytecount);
1027 
1028    if (ARG1 == 0) {
1029       /* read the LDT into ptr */
1030       PRE_MEM_WRITE( "modify_ldt(ptr)", ARG2, ARG3 );
1031    }
1032    if (ARG1 == 1 || ARG1 == 0x11) {
1033       /* write the LDT with the entry pointed at by ptr */
1034       PRE_MEM_READ( "modify_ldt(ptr)", ARG2, sizeof(vki_modify_ldt_t) );
1035    }
1036    /* "do" the syscall ourselves; the kernel never sees it */
1037    SET_STATUS_from_SysRes( sys_modify_ldt( tid, ARG1, (void*)ARG2, ARG3 ) );
1038 
1039    if (ARG1 == 0 && SUCCESS && RES > 0) {
1040       POST_MEM_WRITE( ARG2, RES );
1041    }
1042 }
1043 
PRE(sys_set_thread_area)1044 PRE(sys_set_thread_area)
1045 {
1046    PRINT("sys_set_thread_area ( %#lx )", ARG1);
1047    PRE_REG_READ1(int, "set_thread_area", struct user_desc *, u_info)
1048    PRE_MEM_READ( "set_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1049 
1050    /* "do" the syscall ourselves; the kernel never sees it */
1051    SET_STATUS_from_SysRes( sys_set_thread_area( tid, (void *)ARG1 ) );
1052 }
1053 
PRE(sys_get_thread_area)1054 PRE(sys_get_thread_area)
1055 {
1056    PRINT("sys_get_thread_area ( %#lx )", ARG1);
1057    PRE_REG_READ1(int, "get_thread_area", struct user_desc *, u_info)
1058    PRE_MEM_WRITE( "get_thread_area(u_info)", ARG1, sizeof(vki_modify_ldt_t) );
1059 
1060    /* "do" the syscall ourselves; the kernel never sees it */
1061    SET_STATUS_from_SysRes( sys_get_thread_area( tid, (void *)ARG1 ) );
1062 
1063    if (SUCCESS) {
1064       POST_MEM_WRITE( ARG1, sizeof(vki_modify_ldt_t) );
1065    }
1066 }
1067 
1068 // Parts of this are x86-specific, but the *PEEK* cases are generic.
1069 //
1070 // ARG3 is only used for pointers into the traced process's address
1071 // space and for offsets into the traced process's struct
1072 // user_regs_struct. It is never a pointer into this process's memory
1073 // space, and we should therefore not check anything it points to.
PRE(sys_ptrace)1074 PRE(sys_ptrace)
1075 {
1076    PRINT("sys_ptrace ( %ld, %ld, %#lx, %#lx )", SARG1, SARG2, ARG3, ARG4);
1077    PRE_REG_READ4(int, "ptrace",
1078                  long, request, long, pid, unsigned long, addr,
1079                  unsigned long, data);
1080    switch (ARG1) {
1081    case VKI_PTRACE_PEEKTEXT:
1082    case VKI_PTRACE_PEEKDATA:
1083    case VKI_PTRACE_PEEKUSR:
1084       PRE_MEM_WRITE( "ptrace(peek)", ARG4,
1085 		     sizeof (long));
1086       break;
1087    case VKI_PTRACE_GETREGS:
1088       PRE_MEM_WRITE( "ptrace(getregs)", ARG4,
1089 		     sizeof (struct vki_user_regs_struct));
1090       break;
1091    case VKI_PTRACE_GETFPREGS:
1092       PRE_MEM_WRITE( "ptrace(getfpregs)", ARG4,
1093 		     sizeof (struct vki_user_i387_struct));
1094       break;
1095    case VKI_PTRACE_GETFPXREGS:
1096       PRE_MEM_WRITE( "ptrace(getfpxregs)", ARG4,
1097                      sizeof(struct vki_user_fxsr_struct) );
1098       break;
1099    case VKI_PTRACE_GET_THREAD_AREA:
1100       PRE_MEM_WRITE( "ptrace(get_thread_area)", ARG4,
1101                      sizeof(struct vki_user_desc) );
1102       break;
1103    case VKI_PTRACE_SETREGS:
1104       PRE_MEM_READ( "ptrace(setregs)", ARG4,
1105 		     sizeof (struct vki_user_regs_struct));
1106       break;
1107    case VKI_PTRACE_SETFPREGS:
1108       PRE_MEM_READ( "ptrace(setfpregs)", ARG4,
1109 		     sizeof (struct vki_user_i387_struct));
1110       break;
1111    case VKI_PTRACE_SETFPXREGS:
1112       PRE_MEM_READ( "ptrace(setfpxregs)", ARG4,
1113                      sizeof(struct vki_user_fxsr_struct) );
1114       break;
1115    case VKI_PTRACE_SET_THREAD_AREA:
1116       PRE_MEM_READ( "ptrace(set_thread_area)", ARG4,
1117                      sizeof(struct vki_user_desc) );
1118       break;
1119    case VKI_PTRACE_GETEVENTMSG:
1120       PRE_MEM_WRITE( "ptrace(geteventmsg)", ARG4, sizeof(unsigned long));
1121       break;
1122    case VKI_PTRACE_GETSIGINFO:
1123       PRE_MEM_WRITE( "ptrace(getsiginfo)", ARG4, sizeof(vki_siginfo_t));
1124       break;
1125    case VKI_PTRACE_SETSIGINFO:
1126       PRE_MEM_READ( "ptrace(setsiginfo)", ARG4, sizeof(vki_siginfo_t));
1127       break;
1128    case VKI_PTRACE_GETREGSET:
1129       ML_(linux_PRE_getregset)(tid, ARG3, ARG4);
1130       break;
1131    case VKI_PTRACE_SETREGSET:
1132       ML_(linux_PRE_setregset)(tid, ARG3, ARG4);
1133       break;
1134    default:
1135       break;
1136    }
1137 }
1138 
POST(sys_ptrace)1139 POST(sys_ptrace)
1140 {
1141    switch (ARG1) {
1142    case VKI_PTRACE_PEEKTEXT:
1143    case VKI_PTRACE_PEEKDATA:
1144    case VKI_PTRACE_PEEKUSR:
1145       POST_MEM_WRITE( ARG4, sizeof (long));
1146       break;
1147    case VKI_PTRACE_GETREGS:
1148       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_regs_struct));
1149       break;
1150    case VKI_PTRACE_GETFPREGS:
1151       POST_MEM_WRITE( ARG4, sizeof (struct vki_user_i387_struct));
1152       break;
1153    case VKI_PTRACE_GETFPXREGS:
1154       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_fxsr_struct) );
1155       break;
1156    case VKI_PTRACE_GET_THREAD_AREA:
1157       POST_MEM_WRITE( ARG4, sizeof(struct vki_user_desc) );
1158       break;
1159    case VKI_PTRACE_GETEVENTMSG:
1160       POST_MEM_WRITE( ARG4, sizeof(unsigned long));
1161       break;
1162    case VKI_PTRACE_GETSIGINFO:
1163       /* XXX: This is a simplification. Different parts of the
1164        * siginfo_t are valid depending on the type of signal.
1165        */
1166       POST_MEM_WRITE( ARG4, sizeof(vki_siginfo_t));
1167       break;
1168    case VKI_PTRACE_GETREGSET:
1169       ML_(linux_POST_getregset)(tid, ARG3, ARG4);
1170       break;
1171    default:
1172       break;
1173    }
1174 }
1175 
PRE(old_mmap)1176 PRE(old_mmap)
1177 {
1178    /* struct mmap_arg_struct {
1179          unsigned long addr;
1180          unsigned long len;
1181          unsigned long prot;
1182          unsigned long flags;
1183          unsigned long fd;
1184          unsigned long offset;
1185    }; */
1186    UWord a1, a2, a3, a4, a5, a6;
1187    SysRes r;
1188 
1189    UWord* args = (UWord*)ARG1;
1190    PRE_REG_READ1(long, "old_mmap", struct mmap_arg_struct *, args);
1191    PRE_MEM_READ( "old_mmap(args)", (Addr)args, 6*sizeof(UWord) );
1192 
1193    a1 = args[1-1];
1194    a2 = args[2-1];
1195    a3 = args[3-1];
1196    a4 = args[4-1];
1197    a5 = args[5-1];
1198    a6 = args[6-1];
1199 
1200    PRINT("old_mmap ( %#lx, %lu, %ld, %ld, %ld, %ld )",
1201          a1, a2, (Word)a3, (Word)a4, (Word)a5, (Word)a6 );
1202 
1203    r = ML_(generic_PRE_sys_mmap)( tid, a1, a2, a3, a4, a5, (Off64T)a6 );
1204    SET_STATUS_from_SysRes(r);
1205 }
1206 
PRE(sys_mmap2)1207 PRE(sys_mmap2)
1208 {
1209    SysRes r;
1210 
1211    // Exactly like old_mmap() except:
1212    //  - all 6 args are passed in regs, rather than in a memory-block.
1213    //  - the file offset is specified in pagesize units rather than bytes,
1214    //    so that it can be used for files bigger than 2^32 bytes.
1215    // pagesize or 4K-size units in offset?  For ppc32/64-linux, this is
1216    // 4K-sized.  Assert that the page size is 4K here for safety.
1217    vg_assert(VKI_PAGE_SIZE == 4096);
1218    PRINT("sys_mmap2 ( %#lx, %lu, %lu, %lu, %lu, %lu )",
1219          ARG1, ARG2, ARG3, ARG4, ARG5, ARG6 );
1220    PRE_REG_READ6(long, "mmap2",
1221                  unsigned long, start, unsigned long, length,
1222                  unsigned long, prot,  unsigned long, flags,
1223                  unsigned long, fd,    unsigned long, offset);
1224 
1225    r = ML_(generic_PRE_sys_mmap)( tid, ARG1, ARG2, ARG3, ARG4, ARG5,
1226                                        4096 * (Off64T)ARG6 );
1227    SET_STATUS_from_SysRes(r);
1228 }
1229 
1230 // XXX: lstat64/fstat64/stat64 are generic, but not necessarily
1231 // applicable to every architecture -- I think only to 32-bit archs.
1232 // We're going to need something like linux/core_os32.h for such
1233 // things, eventually, I think.  --njn
PRE(sys_lstat64)1234 PRE(sys_lstat64)
1235 {
1236    PRINT("sys_lstat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1237    PRE_REG_READ2(long, "lstat64", char *, file_name, struct stat64 *, buf);
1238    PRE_MEM_RASCIIZ( "lstat64(file_name)", ARG1 );
1239    PRE_MEM_WRITE( "lstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1240 }
1241 
POST(sys_lstat64)1242 POST(sys_lstat64)
1243 {
1244    vg_assert(SUCCESS);
1245    if (RES == 0) {
1246       POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1247    }
1248 }
1249 
PRE(sys_stat64)1250 PRE(sys_stat64)
1251 {
1252    FUSE_COMPATIBLE_MAY_BLOCK();
1253    PRINT("sys_stat64 ( %#lx(%s), %#lx )", ARG1, (HChar*)ARG1, ARG2);
1254    PRE_REG_READ2(long, "stat64", char *, file_name, struct stat64 *, buf);
1255    PRE_MEM_RASCIIZ( "stat64(file_name)", ARG1 );
1256    PRE_MEM_WRITE( "stat64(buf)", ARG2, sizeof(struct vki_stat64) );
1257 }
1258 
POST(sys_stat64)1259 POST(sys_stat64)
1260 {
1261    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1262 }
1263 
PRE(sys_fstatat64)1264 PRE(sys_fstatat64)
1265 {
1266    FUSE_COMPATIBLE_MAY_BLOCK();
1267    // ARG4 =  int flags;  Flags are or'ed together, therefore writing them
1268    // as a hex constant is more meaningful.
1269    PRINT("sys_fstatat64 ( %ld, %#lx(%s), %#lx, %#lx )",
1270          SARG1, ARG2, (HChar*)ARG2, ARG3, ARG4);
1271    PRE_REG_READ4(long, "fstatat64",
1272                  int, dfd, char *, file_name, struct stat64 *, buf, int, flags);
1273    PRE_MEM_RASCIIZ( "fstatat64(file_name)", ARG2 );
1274    PRE_MEM_WRITE( "fstatat64(buf)", ARG3, sizeof(struct vki_stat64) );
1275 }
1276 
POST(sys_fstatat64)1277 POST(sys_fstatat64)
1278 {
1279    POST_MEM_WRITE( ARG3, sizeof(struct vki_stat64) );
1280 }
1281 
PRE(sys_fstat64)1282 PRE(sys_fstat64)
1283 {
1284    PRINT("sys_fstat64 ( %lu, %#lx )", ARG1, ARG2);
1285    PRE_REG_READ2(long, "fstat64", unsigned long, fd, struct stat64 *, buf);
1286    PRE_MEM_WRITE( "fstat64(buf)", ARG2, sizeof(struct vki_stat64) );
1287 }
1288 
POST(sys_fstat64)1289 POST(sys_fstat64)
1290 {
1291    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat64) );
1292 }
1293 
1294 /* NB: arm-linux has a clone of this one, and ppc32-linux has an almost
1295    identical version. */
PRE(sys_sigsuspend)1296 PRE(sys_sigsuspend)
1297 {
1298    /* The C library interface to sigsuspend just takes a pointer to
1299       a signal mask but this system call has three arguments - the first
1300       two don't appear to be used by the kernel and are always passed as
1301       zero by glibc and the third is the first word of the signal mask
1302       so only 32 signals are supported.
1303 
1304       In fact glibc normally uses rt_sigsuspend if it is available as
1305       that takes a pointer to the signal mask so supports more signals.
1306     */
1307    *flags |= SfMayBlock;
1308    PRINT("sys_sigsuspend ( %ld, %ld, %lu )", SARG1, SARG2, ARG3 );
1309    PRE_REG_READ3(int, "sigsuspend",
1310                  int, history0, int, history1,
1311                  vki_old_sigset_t, mask);
1312 }
1313 
PRE(sys_vm86old)1314 PRE(sys_vm86old)
1315 {
1316    PRINT("sys_vm86old ( %#lx )", ARG1);
1317    PRE_REG_READ1(int, "vm86old", struct vm86_struct *, info);
1318    PRE_MEM_WRITE( "vm86old(info)", ARG1, sizeof(struct vki_vm86_struct));
1319 }
1320 
POST(sys_vm86old)1321 POST(sys_vm86old)
1322 {
1323    POST_MEM_WRITE( ARG1, sizeof(struct vki_vm86_struct));
1324 }
1325 
PRE(sys_vm86)1326 PRE(sys_vm86)
1327 {
1328    PRINT("sys_vm86 ( %lu, %#lx )", ARG1, ARG2);
1329    PRE_REG_READ2(int, "vm86", unsigned long, fn, struct vm86plus_struct *, v86);
1330    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1331       PRE_MEM_WRITE( "vm86(v86)", ARG2, sizeof(struct vki_vm86plus_struct));
1332 }
1333 
POST(sys_vm86)1334 POST(sys_vm86)
1335 {
1336    if (ARG1 == VKI_VM86_ENTER || ARG1 == VKI_VM86_ENTER_NO_BYPASS)
1337       POST_MEM_WRITE( ARG2, sizeof(struct vki_vm86plus_struct));
1338 }
1339 
1340 
1341 /* ---------------------------------------------------------------
1342    PRE/POST wrappers for x86/Linux-variant specific syscalls
1343    ------------------------------------------------------------ */
1344 
PRE(sys_syscall223)1345 PRE(sys_syscall223)
1346 {
1347    Int err;
1348 
1349    /* 223 is used by sys_bproc.  If we're not on a declared bproc
1350       variant, fail in the usual way. */
1351 
1352    if (!KernelVariantiS(KernelVariant_bproc, VG_(clo_kernel_variant))) {
1353       PRINT("non-existent syscall! (syscall 223)");
1354       PRE_REG_READ0(long, "ni_syscall(223)");
1355       SET_STATUS_Failure( VKI_ENOSYS );
1356       return;
1357    }
1358 
1359    err = ML_(linux_variant_PRE_sys_bproc)( ARG1, ARG2, ARG3,
1360                                            ARG4, ARG5, ARG6 );
1361    if (err) {
1362       SET_STATUS_Failure( err );
1363       return;
1364    }
1365    /* Let it go through. */
1366    *flags |= SfMayBlock; /* who knows?  play safe. */
1367 }
1368 
POST(sys_syscall223)1369 POST(sys_syscall223)
1370 {
1371    ML_(linux_variant_POST_sys_bproc)( ARG1, ARG2, ARG3,
1372                                       ARG4, ARG5, ARG6 );
1373 }
1374 
1375 #undef PRE
1376 #undef POST
1377 
1378 
1379 /* ---------------------------------------------------------------------
1380    The x86/Linux syscall table
1381    ------------------------------------------------------------------ */
1382 
1383 /* Add an x86-linux specific wrapper to a syscall table. */
1384 #define PLAX_(sysno, name)    WRAPPER_ENTRY_X_(x86_linux, sysno, name)
1385 #define PLAXY(sysno, name)    WRAPPER_ENTRY_XY(x86_linux, sysno, name)
1386 
1387 
1388 // This table maps from __NR_xxx syscall numbers (from
1389 // linux/include/asm-i386/unistd.h) to the appropriate PRE/POST sys_foo()
1390 // wrappers on x86 (as per sys_call_table in linux/arch/i386/kernel/entry.S).
1391 //
1392 // For those syscalls not handled by Valgrind, the annotation indicate its
1393 // arch/OS combination, eg. */* (generic), */Linux (Linux only), ?/?
1394 // (unknown).
1395 
1396 static SyscallTableEntry syscall_table[] = {
1397 //zz    //   (restart_syscall)                             // 0
1398    GENX_(__NR_exit,              sys_exit),           // 1
1399    GENX_(__NR_fork,              sys_fork),           // 2
1400    GENXY(__NR_read,              sys_read),           // 3
1401    GENX_(__NR_write,             sys_write),          // 4
1402 
1403    GENXY(__NR_open,              sys_open),           // 5
1404    GENXY(__NR_close,             sys_close),          // 6
1405    GENXY(__NR_waitpid,           sys_waitpid),        // 7
1406    GENXY(__NR_creat,             sys_creat),          // 8
1407    GENX_(__NR_link,              sys_link),           // 9
1408 
1409    GENX_(__NR_unlink,            sys_unlink),         // 10
1410    GENX_(__NR_execve,            sys_execve),         // 11
1411    GENX_(__NR_chdir,             sys_chdir),          // 12
1412    GENXY(__NR_time,              sys_time),           // 13
1413    GENX_(__NR_mknod,             sys_mknod),          // 14
1414 
1415    GENX_(__NR_chmod,             sys_chmod),          // 15
1416 //zz    LINX_(__NR_lchown,            sys_lchown16),       // 16
1417    GENX_(__NR_break,             sys_ni_syscall),     // 17
1418 //zz    //   (__NR_oldstat,           sys_stat),           // 18 (obsolete)
1419    LINX_(__NR_lseek,             sys_lseek),          // 19
1420 
1421    GENX_(__NR_getpid,            sys_getpid),         // 20
1422    LINX_(__NR_mount,             sys_mount),          // 21
1423    LINX_(__NR_umount,            sys_oldumount),      // 22
1424    LINX_(__NR_setuid,            sys_setuid16),       // 23 ## P
1425    LINX_(__NR_getuid,            sys_getuid16),       // 24 ## P
1426 
1427    LINX_(__NR_stime,             sys_stime),          // 25 * (SVr4,SVID,X/OPEN)
1428    PLAXY(__NR_ptrace,            sys_ptrace),         // 26
1429    GENX_(__NR_alarm,             sys_alarm),          // 27
1430 //zz    //   (__NR_oldfstat,          sys_fstat),          // 28 * L -- obsolete
1431    GENX_(__NR_pause,             sys_pause),          // 29
1432 
1433    LINX_(__NR_utime,             sys_utime),          // 30
1434    GENX_(__NR_stty,              sys_ni_syscall),     // 31
1435    GENX_(__NR_gtty,              sys_ni_syscall),     // 32
1436    GENX_(__NR_access,            sys_access),         // 33
1437    GENX_(__NR_nice,              sys_nice),           // 34
1438 
1439    GENX_(__NR_ftime,             sys_ni_syscall),     // 35
1440    GENX_(__NR_sync,              sys_sync),           // 36
1441    GENX_(__NR_kill,              sys_kill),           // 37
1442    GENX_(__NR_rename,            sys_rename),         // 38
1443    GENX_(__NR_mkdir,             sys_mkdir),          // 39
1444 
1445    GENX_(__NR_rmdir,             sys_rmdir),          // 40
1446    GENXY(__NR_dup,               sys_dup),            // 41
1447    LINXY(__NR_pipe,              sys_pipe),           // 42
1448    GENXY(__NR_times,             sys_times),          // 43
1449    GENX_(__NR_prof,              sys_ni_syscall),     // 44
1450 //zz
1451    GENX_(__NR_brk,               sys_brk),            // 45
1452    LINX_(__NR_setgid,            sys_setgid16),       // 46
1453    LINX_(__NR_getgid,            sys_getgid16),       // 47
1454 //zz    //   (__NR_signal,            sys_signal),         // 48 */* (ANSI C)
1455    LINX_(__NR_geteuid,           sys_geteuid16),      // 49
1456 
1457    LINX_(__NR_getegid,           sys_getegid16),      // 50
1458    GENX_(__NR_acct,              sys_acct),           // 51
1459    LINX_(__NR_umount2,           sys_umount),         // 52
1460    GENX_(__NR_lock,              sys_ni_syscall),     // 53
1461    LINXY(__NR_ioctl,             sys_ioctl),          // 54
1462 
1463    LINXY(__NR_fcntl,             sys_fcntl),          // 55
1464    GENX_(__NR_mpx,               sys_ni_syscall),     // 56
1465    GENX_(__NR_setpgid,           sys_setpgid),        // 57
1466    GENX_(__NR_ulimit,            sys_ni_syscall),     // 58
1467 //zz    //   (__NR_oldolduname,       sys_olduname),       // 59 Linux -- obsolete
1468 //zz
1469    GENX_(__NR_umask,             sys_umask),          // 60
1470    GENX_(__NR_chroot,            sys_chroot),         // 61
1471 //zz    //   (__NR_ustat,             sys_ustat)           // 62 SVr4 -- deprecated
1472    GENXY(__NR_dup2,              sys_dup2),           // 63
1473    GENX_(__NR_getppid,           sys_getppid),        // 64
1474 
1475    GENX_(__NR_getpgrp,           sys_getpgrp),        // 65
1476    GENX_(__NR_setsid,            sys_setsid),         // 66
1477    LINXY(__NR_sigaction,         sys_sigaction),      // 67
1478 //zz    //   (__NR_sgetmask,          sys_sgetmask),       // 68 */* (ANSI C)
1479 //zz    //   (__NR_ssetmask,          sys_ssetmask),       // 69 */* (ANSI C)
1480 //zz
1481    LINX_(__NR_setreuid,          sys_setreuid16),     // 70
1482    LINX_(__NR_setregid,          sys_setregid16),     // 71
1483    PLAX_(__NR_sigsuspend,        sys_sigsuspend),     // 72
1484    LINXY(__NR_sigpending,        sys_sigpending),     // 73
1485    GENX_(__NR_sethostname,       sys_sethostname),    // 74
1486 //zz
1487    GENX_(__NR_setrlimit,         sys_setrlimit),      // 75
1488    GENXY(__NR_getrlimit,         sys_old_getrlimit),  // 76
1489    GENXY(__NR_getrusage,         sys_getrusage),      // 77
1490    GENXY(__NR_gettimeofday,      sys_gettimeofday),   // 78
1491    GENX_(__NR_settimeofday,      sys_settimeofday),   // 79
1492 
1493    LINXY(__NR_getgroups,         sys_getgroups16),    // 80
1494    LINX_(__NR_setgroups,         sys_setgroups16),    // 81
1495    PLAX_(__NR_select,            old_select),         // 82
1496    GENX_(__NR_symlink,           sys_symlink),        // 83
1497 //zz    //   (__NR_oldlstat,          sys_lstat),          // 84 -- obsolete
1498 //zz
1499    GENX_(__NR_readlink,          sys_readlink),       // 85
1500 //zz    //   (__NR_uselib,            sys_uselib),         // 86 */Linux
1501 //zz    //   (__NR_swapon,            sys_swapon),         // 87 */Linux
1502 //zz    //   (__NR_reboot,            sys_reboot),         // 88 */Linux
1503 //zz    //   (__NR_readdir,           old_readdir),        // 89 -- superseded
1504 //zz
1505    PLAX_(__NR_mmap,              old_mmap),           // 90
1506    GENXY(__NR_munmap,            sys_munmap),         // 91
1507    GENX_(__NR_truncate,          sys_truncate),       // 92
1508    GENX_(__NR_ftruncate,         sys_ftruncate),      // 93
1509    GENX_(__NR_fchmod,            sys_fchmod),         // 94
1510 
1511    LINX_(__NR_fchown,            sys_fchown16),       // 95
1512    GENX_(__NR_getpriority,       sys_getpriority),    // 96
1513    GENX_(__NR_setpriority,       sys_setpriority),    // 97
1514    GENX_(__NR_profil,            sys_ni_syscall),     // 98
1515    GENXY(__NR_statfs,            sys_statfs),         // 99
1516 
1517    GENXY(__NR_fstatfs,           sys_fstatfs),        // 100
1518    LINX_(__NR_ioperm,            sys_ioperm),         // 101
1519    LINXY(__NR_socketcall,        sys_socketcall),     // 102 x86/Linux-only
1520    LINXY(__NR_syslog,            sys_syslog),         // 103
1521    GENXY(__NR_setitimer,         sys_setitimer),      // 104
1522 
1523    GENXY(__NR_getitimer,         sys_getitimer),      // 105
1524    GENXY(__NR_stat,              sys_newstat),        // 106
1525    GENXY(__NR_lstat,             sys_newlstat),       // 107
1526    GENXY(__NR_fstat,             sys_newfstat),       // 108
1527 //zz    //   (__NR_olduname,          sys_uname),          // 109 -- obsolete
1528 //zz
1529    GENX_(__NR_iopl,              sys_iopl),           // 110
1530    LINX_(__NR_vhangup,           sys_vhangup),        // 111
1531    GENX_(__NR_idle,              sys_ni_syscall),     // 112
1532    PLAXY(__NR_vm86old,           sys_vm86old),        // 113 x86/Linux-only
1533    GENXY(__NR_wait4,             sys_wait4),          // 114
1534 //zz
1535 //zz    //   (__NR_swapoff,           sys_swapoff),        // 115 */Linux
1536    LINXY(__NR_sysinfo,           sys_sysinfo),        // 116
1537    LINXY(__NR_ipc,               sys_ipc),            // 117
1538    GENX_(__NR_fsync,             sys_fsync),          // 118
1539    PLAX_(__NR_sigreturn,         sys_sigreturn),      // 119 ?/Linux
1540 
1541    PLAX_(__NR_clone,             sys_clone),          // 120
1542 //zz    //   (__NR_setdomainname,     sys_setdomainname),  // 121 */*(?)
1543    GENXY(__NR_uname,             sys_newuname),       // 122
1544    PLAX_(__NR_modify_ldt,        sys_modify_ldt),     // 123
1545    LINXY(__NR_adjtimex,          sys_adjtimex),       // 124
1546 
1547    GENXY(__NR_mprotect,          sys_mprotect),       // 125
1548    LINXY(__NR_sigprocmask,       sys_sigprocmask),    // 126
1549 //zz    // Nb: create_module() was removed 2.4-->2.6
1550    GENX_(__NR_create_module,     sys_ni_syscall),     // 127
1551    LINX_(__NR_init_module,       sys_init_module),    // 128
1552    LINX_(__NR_delete_module,     sys_delete_module),  // 129
1553 //zz
1554 //zz    // Nb: get_kernel_syms() was removed 2.4-->2.6
1555    GENX_(__NR_get_kernel_syms,   sys_ni_syscall),     // 130
1556    LINX_(__NR_quotactl,          sys_quotactl),       // 131
1557    GENX_(__NR_getpgid,           sys_getpgid),        // 132
1558    GENX_(__NR_fchdir,            sys_fchdir),         // 133
1559 //zz    //   (__NR_bdflush,           sys_bdflush),        // 134 */Linux
1560 //zz
1561 //zz    //   (__NR_sysfs,             sys_sysfs),          // 135 SVr4
1562    LINX_(__NR_personality,       sys_personality),    // 136
1563    GENX_(__NR_afs_syscall,       sys_ni_syscall),     // 137
1564    LINX_(__NR_setfsuid,          sys_setfsuid16),     // 138
1565    LINX_(__NR_setfsgid,          sys_setfsgid16),     // 139
1566 
1567    LINXY(__NR__llseek,           sys_llseek),         // 140
1568    GENXY(__NR_getdents,          sys_getdents),       // 141
1569    GENX_(__NR__newselect,        sys_select),         // 142
1570    GENX_(__NR_flock,             sys_flock),          // 143
1571    GENX_(__NR_msync,             sys_msync),          // 144
1572 
1573    GENXY(__NR_readv,             sys_readv),          // 145
1574    GENX_(__NR_writev,            sys_writev),         // 146
1575    GENX_(__NR_getsid,            sys_getsid),         // 147
1576    GENX_(__NR_fdatasync,         sys_fdatasync),      // 148
1577    LINXY(__NR__sysctl,           sys_sysctl),         // 149
1578 
1579    GENX_(__NR_mlock,             sys_mlock),          // 150
1580    GENX_(__NR_munlock,           sys_munlock),        // 151
1581    GENX_(__NR_mlockall,          sys_mlockall),       // 152
1582    LINX_(__NR_munlockall,        sys_munlockall),     // 153
1583    LINXY(__NR_sched_setparam,    sys_sched_setparam), // 154
1584 
1585    LINXY(__NR_sched_getparam,         sys_sched_getparam),        // 155
1586    LINX_(__NR_sched_setscheduler,     sys_sched_setscheduler),    // 156
1587    LINX_(__NR_sched_getscheduler,     sys_sched_getscheduler),    // 157
1588    LINX_(__NR_sched_yield,            sys_sched_yield),           // 158
1589    LINX_(__NR_sched_get_priority_max, sys_sched_get_priority_max),// 159
1590 
1591    LINX_(__NR_sched_get_priority_min, sys_sched_get_priority_min),// 160
1592    LINXY(__NR_sched_rr_get_interval,  sys_sched_rr_get_interval), // 161
1593    GENXY(__NR_nanosleep,         sys_nanosleep),      // 162
1594    GENX_(__NR_mremap,            sys_mremap),         // 163
1595    LINX_(__NR_setresuid,         sys_setresuid16),    // 164
1596 
1597    LINXY(__NR_getresuid,         sys_getresuid16),    // 165
1598    PLAXY(__NR_vm86,              sys_vm86),           // 166 x86/Linux-only
1599    GENX_(__NR_query_module,      sys_ni_syscall),     // 167
1600    GENXY(__NR_poll,              sys_poll),           // 168
1601 //zz    //   (__NR_nfsservctl,        sys_nfsservctl),     // 169 */Linux
1602 //zz
1603    LINX_(__NR_setresgid,         sys_setresgid16),    // 170
1604    LINXY(__NR_getresgid,         sys_getresgid16),    // 171
1605    LINXY(__NR_prctl,             sys_prctl),          // 172
1606    PLAX_(__NR_rt_sigreturn,      sys_rt_sigreturn),   // 173 x86/Linux only?
1607    LINXY(__NR_rt_sigaction,      sys_rt_sigaction),   // 174
1608 
1609    LINXY(__NR_rt_sigprocmask,    sys_rt_sigprocmask), // 175
1610    LINXY(__NR_rt_sigpending,     sys_rt_sigpending),  // 176
1611    LINXY(__NR_rt_sigtimedwait,   sys_rt_sigtimedwait),// 177
1612    LINXY(__NR_rt_sigqueueinfo,   sys_rt_sigqueueinfo),// 178
1613    LINX_(__NR_rt_sigsuspend,     sys_rt_sigsuspend),  // 179
1614 
1615    GENXY(__NR_pread64,           sys_pread64),        // 180
1616    GENX_(__NR_pwrite64,          sys_pwrite64),       // 181
1617    LINX_(__NR_chown,             sys_chown16),        // 182
1618    GENXY(__NR_getcwd,            sys_getcwd),         // 183
1619    LINXY(__NR_capget,            sys_capget),         // 184
1620 
1621    LINX_(__NR_capset,            sys_capset),         // 185
1622    GENXY(__NR_sigaltstack,       sys_sigaltstack),    // 186
1623    LINXY(__NR_sendfile,          sys_sendfile),       // 187
1624    GENXY(__NR_getpmsg,           sys_getpmsg),        // 188
1625    GENX_(__NR_putpmsg,           sys_putpmsg),        // 189
1626 
1627    // Nb: we treat vfork as fork
1628    GENX_(__NR_vfork,             sys_fork),           // 190
1629    GENXY(__NR_ugetrlimit,        sys_getrlimit),      // 191
1630    PLAX_(__NR_mmap2,             sys_mmap2),          // 192
1631    GENX_(__NR_truncate64,        sys_truncate64),     // 193
1632    GENX_(__NR_ftruncate64,       sys_ftruncate64),    // 194
1633 
1634    PLAXY(__NR_stat64,            sys_stat64),         // 195
1635    PLAXY(__NR_lstat64,           sys_lstat64),        // 196
1636    PLAXY(__NR_fstat64,           sys_fstat64),        // 197
1637    GENX_(__NR_lchown32,          sys_lchown),         // 198
1638    GENX_(__NR_getuid32,          sys_getuid),         // 199
1639 
1640    GENX_(__NR_getgid32,          sys_getgid),         // 200
1641    GENX_(__NR_geteuid32,         sys_geteuid),        // 201
1642    GENX_(__NR_getegid32,         sys_getegid),        // 202
1643    GENX_(__NR_setreuid32,        sys_setreuid),       // 203
1644    GENX_(__NR_setregid32,        sys_setregid),       // 204
1645 
1646    GENXY(__NR_getgroups32,       sys_getgroups),      // 205
1647    GENX_(__NR_setgroups32,       sys_setgroups),      // 206
1648    GENX_(__NR_fchown32,          sys_fchown),         // 207
1649    LINX_(__NR_setresuid32,       sys_setresuid),      // 208
1650    LINXY(__NR_getresuid32,       sys_getresuid),      // 209
1651 
1652    LINX_(__NR_setresgid32,       sys_setresgid),      // 210
1653    LINXY(__NR_getresgid32,       sys_getresgid),      // 211
1654    GENX_(__NR_chown32,           sys_chown),          // 212
1655    GENX_(__NR_setuid32,          sys_setuid),         // 213
1656    GENX_(__NR_setgid32,          sys_setgid),         // 214
1657 
1658    LINX_(__NR_setfsuid32,        sys_setfsuid),       // 215
1659    LINX_(__NR_setfsgid32,        sys_setfsgid),       // 216
1660    LINX_(__NR_pivot_root,        sys_pivot_root),     // 217
1661    GENXY(__NR_mincore,           sys_mincore),        // 218
1662    GENX_(__NR_madvise,           sys_madvise),        // 219
1663 
1664    GENXY(__NR_getdents64,        sys_getdents64),     // 220
1665    LINXY(__NR_fcntl64,           sys_fcntl64),        // 221
1666    GENX_(222,                    sys_ni_syscall),     // 222
1667    PLAXY(223,                    sys_syscall223),     // 223 // sys_bproc?
1668    LINX_(__NR_gettid,            sys_gettid),         // 224
1669 
1670    LINX_(__NR_readahead,         sys_readahead),      // 225 */Linux
1671    LINX_(__NR_setxattr,          sys_setxattr),       // 226
1672    LINX_(__NR_lsetxattr,         sys_lsetxattr),      // 227
1673    LINX_(__NR_fsetxattr,         sys_fsetxattr),      // 228
1674    LINXY(__NR_getxattr,          sys_getxattr),       // 229
1675 
1676    LINXY(__NR_lgetxattr,         sys_lgetxattr),      // 230
1677    LINXY(__NR_fgetxattr,         sys_fgetxattr),      // 231
1678    LINXY(__NR_listxattr,         sys_listxattr),      // 232
1679    LINXY(__NR_llistxattr,        sys_llistxattr),     // 233
1680    LINXY(__NR_flistxattr,        sys_flistxattr),     // 234
1681 
1682    LINX_(__NR_removexattr,       sys_removexattr),    // 235
1683    LINX_(__NR_lremovexattr,      sys_lremovexattr),   // 236
1684    LINX_(__NR_fremovexattr,      sys_fremovexattr),   // 237
1685    LINXY(__NR_tkill,             sys_tkill),          // 238 */Linux
1686    LINXY(__NR_sendfile64,        sys_sendfile64),     // 239
1687 
1688    LINXY(__NR_futex,             sys_futex),             // 240
1689    LINX_(__NR_sched_setaffinity, sys_sched_setaffinity), // 241
1690    LINXY(__NR_sched_getaffinity, sys_sched_getaffinity), // 242
1691    PLAX_(__NR_set_thread_area,   sys_set_thread_area),   // 243
1692    PLAX_(__NR_get_thread_area,   sys_get_thread_area),   // 244
1693 
1694    LINXY(__NR_io_setup,          sys_io_setup),       // 245
1695    LINX_(__NR_io_destroy,        sys_io_destroy),     // 246
1696    LINXY(__NR_io_getevents,      sys_io_getevents),   // 247
1697    LINX_(__NR_io_submit,         sys_io_submit),      // 248
1698    LINXY(__NR_io_cancel,         sys_io_cancel),      // 249
1699 
1700    LINX_(__NR_fadvise64,         sys_fadvise64),      // 250 */(Linux?)
1701    GENX_(251,                    sys_ni_syscall),     // 251
1702    LINX_(__NR_exit_group,        sys_exit_group),     // 252
1703    LINXY(__NR_lookup_dcookie,    sys_lookup_dcookie), // 253
1704    LINXY(__NR_epoll_create,      sys_epoll_create),   // 254
1705 
1706    LINX_(__NR_epoll_ctl,         sys_epoll_ctl),         // 255
1707    LINXY(__NR_epoll_wait,        sys_epoll_wait),        // 256
1708 //zz    //   (__NR_remap_file_pages,  sys_remap_file_pages),  // 257 */Linux
1709    LINX_(__NR_set_tid_address,   sys_set_tid_address),   // 258
1710    LINXY(__NR_timer_create,      sys_timer_create),      // 259
1711 
1712    LINXY(__NR_timer_settime,     sys_timer_settime),  // (timer_create+1)
1713    LINXY(__NR_timer_gettime,     sys_timer_gettime),  // (timer_create+2)
1714    LINX_(__NR_timer_getoverrun,  sys_timer_getoverrun),//(timer_create+3)
1715    LINX_(__NR_timer_delete,      sys_timer_delete),   // (timer_create+4)
1716    LINX_(__NR_clock_settime,     sys_clock_settime),  // (timer_create+5)
1717 
1718    LINXY(__NR_clock_gettime,     sys_clock_gettime),  // (timer_create+6)
1719    LINXY(__NR_clock_getres,      sys_clock_getres),   // (timer_create+7)
1720    LINXY(__NR_clock_nanosleep,   sys_clock_nanosleep),// (timer_create+8) */*
1721    GENXY(__NR_statfs64,          sys_statfs64),       // 268
1722    GENXY(__NR_fstatfs64,         sys_fstatfs64),      // 269
1723 
1724    LINX_(__NR_tgkill,            sys_tgkill),         // 270 */Linux
1725    GENX_(__NR_utimes,            sys_utimes),         // 271
1726    LINX_(__NR_fadvise64_64,      sys_fadvise64_64),   // 272 */(Linux?)
1727    GENX_(__NR_vserver,           sys_ni_syscall),     // 273
1728    LINX_(__NR_mbind,             sys_mbind),          // 274 ?/?
1729 
1730    LINXY(__NR_get_mempolicy,     sys_get_mempolicy),  // 275 ?/?
1731    LINX_(__NR_set_mempolicy,     sys_set_mempolicy),  // 276 ?/?
1732    LINXY(__NR_mq_open,           sys_mq_open),        // 277
1733    LINX_(__NR_mq_unlink,         sys_mq_unlink),      // (mq_open+1)
1734    LINX_(__NR_mq_timedsend,      sys_mq_timedsend),   // (mq_open+2)
1735 
1736    LINXY(__NR_mq_timedreceive,   sys_mq_timedreceive),// (mq_open+3)
1737    LINX_(__NR_mq_notify,         sys_mq_notify),      // (mq_open+4)
1738    LINXY(__NR_mq_getsetattr,     sys_mq_getsetattr),  // (mq_open+5)
1739    GENX_(__NR_sys_kexec_load,    sys_ni_syscall),     // 283
1740    LINXY(__NR_waitid,            sys_waitid),         // 284
1741 
1742    GENX_(285,                    sys_ni_syscall),     // 285
1743    LINX_(__NR_add_key,           sys_add_key),        // 286
1744    LINX_(__NR_request_key,       sys_request_key),    // 287
1745    LINXY(__NR_keyctl,            sys_keyctl),         // 288
1746    LINX_(__NR_ioprio_set,        sys_ioprio_set),     // 289
1747 
1748    LINX_(__NR_ioprio_get,        sys_ioprio_get),     // 290
1749    LINX_(__NR_inotify_init,	 sys_inotify_init),   // 291
1750    LINX_(__NR_inotify_add_watch, sys_inotify_add_watch), // 292
1751    LINX_(__NR_inotify_rm_watch,	 sys_inotify_rm_watch), // 293
1752 //   LINX_(__NR_migrate_pages,	 sys_migrate_pages),    // 294
1753 
1754    LINXY(__NR_openat,		 sys_openat),           // 295
1755    LINX_(__NR_mkdirat,		 sys_mkdirat),          // 296
1756    LINX_(__NR_mknodat,		 sys_mknodat),          // 297
1757    LINX_(__NR_fchownat,		 sys_fchownat),         // 298
1758    LINX_(__NR_futimesat,	 sys_futimesat),        // 299
1759 
1760    PLAXY(__NR_fstatat64,	 sys_fstatat64),        // 300
1761    LINX_(__NR_unlinkat,		 sys_unlinkat),         // 301
1762    LINX_(__NR_renameat,		 sys_renameat),         // 302
1763    LINX_(__NR_linkat,		 sys_linkat),           // 303
1764    LINX_(__NR_symlinkat,	 sys_symlinkat),        // 304
1765 
1766    LINX_(__NR_readlinkat,	 sys_readlinkat),       // 305
1767    LINX_(__NR_fchmodat,		 sys_fchmodat),         // 306
1768    LINX_(__NR_faccessat,	 sys_faccessat),        // 307
1769    LINX_(__NR_pselect6,		 sys_pselect6),         // 308
1770    LINXY(__NR_ppoll,		 sys_ppoll),            // 309
1771 
1772    LINX_(__NR_unshare,		 sys_unshare),          // 310
1773    LINX_(__NR_set_robust_list,	 sys_set_robust_list),  // 311
1774    LINXY(__NR_get_robust_list,	 sys_get_robust_list),  // 312
1775    LINX_(__NR_splice,            sys_splice),           // 313
1776    LINX_(__NR_sync_file_range,   sys_sync_file_range),  // 314
1777 
1778    LINX_(__NR_tee,               sys_tee),              // 315
1779    LINXY(__NR_vmsplice,          sys_vmsplice),         // 316
1780    LINXY(__NR_move_pages,        sys_move_pages),       // 317
1781    LINXY(__NR_getcpu,            sys_getcpu),           // 318
1782    LINXY(__NR_epoll_pwait,       sys_epoll_pwait),      // 319
1783 
1784    LINX_(__NR_utimensat,         sys_utimensat),        // 320
1785    LINXY(__NR_signalfd,          sys_signalfd),         // 321
1786    LINXY(__NR_timerfd_create,    sys_timerfd_create),   // 322
1787    LINXY(__NR_eventfd,           sys_eventfd),          // 323
1788    LINX_(__NR_fallocate,         sys_fallocate),        // 324
1789 
1790    LINXY(__NR_timerfd_settime,   sys_timerfd_settime),  // 325
1791    LINXY(__NR_timerfd_gettime,   sys_timerfd_gettime),  // 326
1792    LINXY(__NR_signalfd4,         sys_signalfd4),        // 327
1793    LINXY(__NR_eventfd2,          sys_eventfd2),         // 328
1794    LINXY(__NR_epoll_create1,     sys_epoll_create1),     // 329
1795 
1796    LINXY(__NR_dup3,              sys_dup3),             // 330
1797    LINXY(__NR_pipe2,             sys_pipe2),            // 331
1798    LINXY(__NR_inotify_init1,     sys_inotify_init1),    // 332
1799    LINXY(__NR_preadv,            sys_preadv),           // 333
1800    LINX_(__NR_pwritev,           sys_pwritev),          // 334
1801 
1802    LINXY(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo),// 335
1803    LINXY(__NR_perf_event_open,   sys_perf_event_open),  // 336
1804    LINXY(__NR_recvmmsg,          sys_recvmmsg),         // 337
1805    LINXY(__NR_fanotify_init,     sys_fanotify_init),    // 338
1806    LINX_(__NR_fanotify_mark,     sys_fanotify_mark),    // 339
1807 
1808    LINXY(__NR_prlimit64,         sys_prlimit64),        // 340
1809    LINXY(__NR_name_to_handle_at, sys_name_to_handle_at),// 341
1810    LINXY(__NR_open_by_handle_at, sys_open_by_handle_at),// 342
1811    LINXY(__NR_clock_adjtime,     sys_clock_adjtime),    // 343
1812    LINX_(__NR_syncfs,            sys_syncfs),           // 344
1813 
1814    LINXY(__NR_sendmmsg,          sys_sendmmsg),         // 345
1815 //   LINX_(__NR_setns,             sys_ni_syscall),       // 346
1816    LINXY(__NR_process_vm_readv,  sys_process_vm_readv), // 347
1817    LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 348
1818    LINX_(__NR_kcmp,              sys_kcmp),             // 349
1819 
1820 //   LIN__(__NR_finit_module,      sys_ni_syscall),       // 350
1821 //   LIN__(__NR_sched_setattr,     sys_ni_syscall),       // 351
1822 //   LIN__(__NR_sched_getattr,     sys_ni_syscall),       // 352
1823 //   LIN__(__NR_renameat2,         sys_ni_syscall),       // 353
1824 //   LIN__(__NR_seccomp,           sys_ni_syscall),       // 354
1825 
1826    LINXY(__NR_getrandom,         sys_getrandom),        // 355
1827    LINXY(__NR_memfd_create,      sys_memfd_create)      // 356
1828 //   LIN__(__NR_bpf,               sys_ni_syscall)        // 357
1829 };
1830 
ML_(get_linux_syscall_entry)1831 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
1832 {
1833    const UInt syscall_table_size
1834       = sizeof(syscall_table) / sizeof(syscall_table[0]);
1835 
1836    /* Is it in the contiguous initial section of the table? */
1837    if (sysno < syscall_table_size) {
1838       SyscallTableEntry* sys = &syscall_table[sysno];
1839       if (sys->before == NULL)
1840          return NULL; /* no entry */
1841       else
1842          return sys;
1843    }
1844 
1845    /* Can't find a wrapper */
1846    return NULL;
1847 }
1848 
1849 #endif // defined(VGP_x86_linux)
1850 
1851 /*--------------------------------------------------------------------*/
1852 /*--- end                                                          ---*/
1853 /*--------------------------------------------------------------------*/
1854