1 
2 /*--------------------------------------------------------------------*/
3 /*--- Linux ticket lock implementation         ticket-lock-linux.c ---*/
4 /*---                                                              ---*/
5 /*--- Guarantees fair scheduling even if multiple threads are      ---*/
6 /*--- runnable at the same time on a multicore system. Has been    ---*/
7 /*--- observed to cause a slow-down compared to the generic        ---*/
8 /*--- scheduler lock with CPU frequency scaling enabled. Makes     ---*/
9 /*--- Valgrind slightly faster if CPU frequency scaling has been   ---*/
10 /*--- disabled. See also http://bugs.kde.org/show_bug.cgi?id=270006---*/
11 /*--------------------------------------------------------------------*/
12 
13 /*
14    This file is part of Valgrind, a dynamic binary instrumentation
15    framework.
16 
17    Copyright (C) 2011-2017 Bart Van Assche <bvanassche@acm.org>.
18 
19    This program is free software; you can redistribute it and/or
20    modify it under the terms of the GNU General Public License as
21    published by the Free Software Foundation; either version 2 of the
22    License, or (at your option) any later version.
23 
24    This program is distributed in the hope that it will be useful, but
25    WITHOUT ANY WARRANTY; without even the implied warranty of
26    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27    General Public License for more details.
28 
29    You should have received a copy of the GNU General Public License
30    along with this program; if not, write to the Free Software
31    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
32    02111-1307, USA.
33 
34    The GNU General Public License is contained in the file COPYING.
35 */
36 
37 #include "pub_core_basics.h"
38 #include "pub_core_libcassert.h"
39 #include "pub_core_libcbase.h"     // VG_(memset)()
40 #include "pub_core_libcprint.h"
41 #include "pub_core_syscall.h"
42 #include "pub_core_vki.h"
43 #include "pub_core_vkiscnums.h"    // __NR_futex
44 #include "pub_core_libcproc.h"
45 #include "pub_core_mallocfree.h"
46 #include "pub_core_threadstate.h"
47 #include "pub_core_inner.h"
48 #if defined(ENABLE_INNER_CLIENT_REQUEST)
49 #include "helgrind/helgrind.h"
50 #endif
51 #include "priv_sched-lock.h"
52 #include "priv_sched-lock-impl.h"
53 
54 #define TL_FUTEX_COUNT_LOG2 4
55 #define TL_FUTEX_COUNT (1U << TL_FUTEX_COUNT_LOG2)
56 #define TL_FUTEX_MASK (TL_FUTEX_COUNT - 1)
57 
58 struct sched_lock {
59    volatile unsigned head;
60    volatile unsigned tail;
61    volatile unsigned futex[TL_FUTEX_COUNT];
62    int owner;
63 };
64 
65 #if 1
66 static Bool s_debug;
67 #else
68 static Bool s_debug = True;
69 #endif
70 
71 static const HChar *get_sched_lock_name(void)
72 {
73    return "ticket lock";
74 }
75 
76 static struct sched_lock *create_sched_lock(void)
77 {
78    struct sched_lock *p;
79 
80    p = VG_(malloc)("sched_lock", sizeof(*p));
81 
82    // The futex syscall requires that a futex takes four bytes.
83    vg_assert(sizeof(p->futex[0]) == 4);
84 
85    VG_(memset)(p, 0, sizeof(*p));
86 
87    INNER_REQUEST(ANNOTATE_RWLOCK_CREATE(p));
88    INNER_REQUEST(ANNOTATE_BENIGN_RACE_SIZED(&p->futex, sizeof(p->futex), ""));
89    return p;
90 }
91 
92 static void destroy_sched_lock(struct sched_lock *p)
93 {
94    INNER_REQUEST(ANNOTATE_RWLOCK_DESTROY(p));
95    VG_(free)(p);
96 }
97 
98 static int get_sched_lock_owner(struct sched_lock *p)
99 {
100    return p->owner;
101 }
102 
103 /*
104  * Acquire ticket lock. Increment the tail of the queue and use the original
105  * value as the ticket value. Wait until the head of the queue equals the
106  * ticket value. The futex used to wait depends on the ticket value in order
107  * to avoid that all threads get woken up every time a ticket lock is
108  * released. That last effect is sometimes called the "thundering herd"
109  * effect.
110  *
111  * See also Nick Piggin, x86: FIFO ticket spinlocks, Linux kernel mailing list
112  * (http://lkml.org/lkml/2007/11/1/125) for more info.
113  */
114 static void acquire_sched_lock(struct sched_lock *p)
115 {
116    unsigned ticket, futex_value;
117    volatile unsigned *futex;
118    SysRes sres;
119 
120    ticket = __sync_fetch_and_add(&p->tail, 1);
121    futex = &p->futex[ticket & TL_FUTEX_MASK];
122    if (s_debug)
123       VG_(printf)("[%d/%d] acquire: ticket %u\n", VG_(getpid)(),
124                   VG_(gettid)(), ticket);
125    for (;;) {
126       futex_value = *futex;
127       __sync_synchronize();
128       if (ticket == p->head)
129          break;
130       if (s_debug)
131          VG_(printf)("[%d/%d] acquire: ticket %u - waiting until"
132                      " futex[%ld] != %u\n", VG_(getpid)(),
133                      VG_(gettid)(), ticket, (long)(futex - p->futex),
134                      futex_value);
135       sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
136                               VKI_FUTEX_WAIT | VKI_FUTEX_PRIVATE_FLAG,
137                               futex_value);
138       if (sr_isError(sres) && sr_Err(sres) != VKI_EAGAIN) {
139          VG_(printf)("futex_wait() returned error code %lu\n", sr_Err(sres));
140          vg_assert(False);
141       }
142    }
143    __sync_synchronize();
144    INNER_REQUEST(ANNOTATE_RWLOCK_ACQUIRED(p, /*is_w*/1));
145    vg_assert(p->owner == 0);
146    p->owner = VG_(gettid)();
147 }
148 
149 /*
150  * Release a ticket lock by incrementing the head of the queue. Only generate
151  * a thread wakeup signal if at least one thread is waiting. If the queue tail
152  * matches the wakeup_ticket value, no threads have to be woken up.
153  *
154  * Note: tail will only be read after head has been incremented since both are
155  * declared as volatile and since the __sync...() functions imply a memory
156  * barrier.
157  */
158 static void release_sched_lock(struct sched_lock *p)
159 {
160    unsigned wakeup_ticket, futex_value;
161    volatile unsigned *futex;
162    SysRes sres;
163 
164    vg_assert(p->owner != 0);
165    p->owner = 0;
166    INNER_REQUEST(ANNOTATE_RWLOCK_RELEASED(p, /*is_w*/1));
167    wakeup_ticket = __sync_fetch_and_add(&p->head, 1) + 1;
168    if (p->tail != wakeup_ticket) {
169       futex = &p->futex[wakeup_ticket & TL_FUTEX_MASK];
170       futex_value = __sync_fetch_and_add(futex, 1);
171       if (s_debug)
172          VG_(printf)("[%d/%d] release: waking up ticket %u (futex[%ld] = %u)"
173                      "\n", VG_(getpid)(), VG_(gettid)(), wakeup_ticket,
174                      (long)(futex - p->futex), futex_value);
175       sres = VG_(do_syscall3)(__NR_futex, (UWord)futex,
176                               VKI_FUTEX_WAKE | VKI_FUTEX_PRIVATE_FLAG,
177                               0x7fffffff);
178       vg_assert(!sr_isError(sres));
179    } else {
180       if (s_debug)
181          VG_(printf)("[%d/%d] release: no thread is waiting for ticket %u\n",
182                      VG_(getpid)(), VG_(gettid)(), wakeup_ticket);
183    }
184 }
185 
186 const struct sched_lock_ops ML_(linux_ticket_lock_ops) = {
187    .get_sched_lock_name  = get_sched_lock_name,
188    .create_sched_lock    = create_sched_lock,
189    .destroy_sched_lock   = destroy_sched_lock,
190    .get_sched_lock_owner = get_sched_lock_owner,
191    .acquire_sched_lock   = acquire_sched_lock,
192    .release_sched_lock   = release_sched_lock,
193 };
194