1 /*
2   This file is part of drd, a thread error detector.
3 
4   Copyright (C) 2006-2013 Bart Van Assche <bvanassche@acm.org>.
5 
6   This program is free software; you can redistribute it and/or
7   modify it under the terms of the GNU General Public License as
8   published by the Free Software Foundation; either version 2 of the
9   License, or (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19   02111-1307, USA.
20 
21   The GNU General Public License is contained in the file COPYING.
22 */
23 
24 
25 #include "drd_barrier.h"
26 #include "drd_clientobj.h"
27 #include "drd_error.h"
28 #include "drd_suppression.h"
29 #include "pub_tool_errormgr.h"    // VG_(maybe_record_error)()
30 #include "pub_tool_libcassert.h"  // tl_assert()
31 #include "pub_tool_libcprint.h"   // VG_(printf)()
32 #include "pub_tool_machine.h"     // VG_(get_IP)()
33 #include "pub_tool_mallocfree.h"  // VG_(malloc)(), VG_(free)()
34 #include "pub_tool_oset.h"
35 #include "pub_tool_threadstate.h" // VG_(get_running_tid)()
36 
37 
38 /* Type definitions. */
39 
40 /** Information associated with one thread participating in a barrier. */
41 struct barrier_thread_info
42 {
43    UWord       tid;           // A DrdThreadId declared as UWord because
44                               // this member variable is the key of an OSet.
45    Segment*    sg;            // Segment of the last pthread_barrier() call
46                               // by thread tid.
47    Segment*    post_wait_sg;  // Segment created after *_barrier_wait() finished
48    ExeContext* wait_call_ctxt;// call stack for *_barrier_wait() call.
49    Bool       thread_finished;// Whether thread 'tid' has finished.
50 };
51 
52 
53 /* Local functions. */
54 
55 static void barrier_cleanup(struct barrier_info* p);
56 static void barrier_delete_thread(struct barrier_info* const p,
57                                   const DrdThreadId tid);
58 static const HChar* barrier_get_typename(struct barrier_info* const p);
59 static const HChar* barrier_type_name(const BarrierT bt);
60 static
61 void barrier_report_wait_delete_race(const struct barrier_info* const p,
62                                      const struct barrier_thread_info* const q);
63 
64 
65 /* Local variables. */
66 
67 static Bool  s_trace_barrier = False;
68 static ULong s_barrier_segment_creation_count;
69 
70 
71 /* Function definitions. */
72 
DRD_(barrier_set_trace)73 void DRD_(barrier_set_trace)(const Bool trace_barrier)
74 {
75    s_trace_barrier = trace_barrier;
76 }
77 
78 /**
79  * Initialize the structure *p with the specified thread ID and iteration
80  * information.
81  */
82 static
DRD_(barrier_thread_initialize)83 void DRD_(barrier_thread_initialize)(struct barrier_thread_info* const p,
84                                      const DrdThreadId tid)
85 {
86    p->tid             = tid;
87    p->sg              = NULL;
88    p->post_wait_sg    = 0;
89    p->wait_call_ctxt  = 0;
90    p->thread_finished = False;
91 }
92 
93 /**
94  * Deallocate the memory that is owned by members of
95  * struct barrier_thread_info.
96  */
DRD_(barrier_thread_destroy)97 static void DRD_(barrier_thread_destroy)(struct barrier_thread_info* const p)
98 {
99    tl_assert(p);
100    DRD_(sg_put)(p->sg);
101    DRD_(sg_put)(p->post_wait_sg);
102 }
103 
104 /**
105  * Initialize the structure *p with the specified client-side barrier address,
106  * barrier object size and number of participants in each barrier.
107  */
108 static
DRD_(barrier_initialize)109 void DRD_(barrier_initialize)(struct barrier_info* const p,
110                               const Addr barrier,
111                               const BarrierT barrier_type,
112                               const Word count)
113 {
114    int i;
115 
116    tl_assert(barrier != 0);
117    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
118    tl_assert(p->a1 == barrier);
119 
120    p->cleanup           = (void(*)(DrdClientobj*))barrier_cleanup;
121    p->delete_thread
122       = (void(*)(DrdClientobj*, DrdThreadId))barrier_delete_thread;
123    p->barrier_type      = barrier_type;
124    p->count             = count;
125    p->pre_iteration     = 0;
126    p->post_iteration    = 0;
127    p->pre_waiters_left  = count;
128    p->post_waiters_left = count;
129 
130    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid) == sizeof(Word));
131    tl_assert(sizeof(((struct barrier_thread_info*)0)->tid)
132              >= sizeof(DrdThreadId));
133    for (i = 0; i < 2; i++) {
134       p->oset[i] = VG_(OSetGen_Create)(0, 0, VG_(malloc), "drd.barrier.bi.1",
135                                        VG_(free));
136    }
137 }
138 
139 /**
140  * Deallocate the memory owned by the struct barrier_info object and also
141  * all the nodes in the OSet p->oset.
142  *
143  * Called by clientobj_destroy().
144  */
barrier_cleanup(struct barrier_info * p)145 static void barrier_cleanup(struct barrier_info* p)
146 {
147    struct barrier_thread_info* q;
148    Segment* latest_sg = 0;
149    OSet* oset;
150    int i;
151 
152    tl_assert(p);
153 
154    DRD_(thread_get_latest_segment)(&latest_sg, DRD_(thread_get_running_tid)());
155    tl_assert(latest_sg);
156 
157    if (p->pre_waiters_left != p->count) {
158       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
159       VG_(maybe_record_error)(VG_(get_running_tid)(),
160                               BarrierErr,
161                               VG_(get_IP)(VG_(get_running_tid)()),
162                               "Destruction of barrier that is being waited"
163                               " upon",
164                               &bei);
165    } else {
166       oset = p->oset[1 - (p->pre_iteration & 1)];
167       VG_(OSetGen_ResetIter)(oset);
168       for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
169          if (q->post_wait_sg && !DRD_(vc_lte)(&q->post_wait_sg->vc,
170                                               &latest_sg->vc))
171          {
172             barrier_report_wait_delete_race(p, q);
173          }
174          DRD_(barrier_thread_destroy)(q);
175       }
176    }
177 
178    for (i = 0; i < 2; i++) {
179       VG_(OSetGen_Destroy)(p->oset[i]);
180       p->oset[i] = NULL;
181    }
182 
183    DRD_(sg_put)(latest_sg);
184 }
185 
186 /**
187  * Look up the client-side barrier address barrier in s_barrier[]. If not
188  * found, add it.
189  */
190 static
191 struct barrier_info*
DRD_(barrier_get_or_allocate)192 DRD_(barrier_get_or_allocate)(const Addr barrier,
193                               const BarrierT barrier_type, const Word count)
194 {
195    struct barrier_info *p;
196 
197    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
198 
199    tl_assert(offsetof(DrdClientobj, barrier) == 0);
200    p = &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
201    if (p == 0)
202    {
203       p = &(DRD_(clientobj_add)(barrier, ClientBarrier)->barrier);
204       DRD_(barrier_initialize)(p, barrier, barrier_type, count);
205    }
206    return p;
207 }
208 
209 /**
210  * Look up the address of the struct barrier_info associated with the
211  * client-side barrier object.
212  */
DRD_(barrier_get)213 static struct barrier_info* DRD_(barrier_get)(const Addr barrier)
214 {
215    tl_assert(offsetof(DrdClientobj, barrier) == 0);
216    return &(DRD_(clientobj_get)(barrier, ClientBarrier)->barrier);
217 }
218 
219 /**
220  * Initialize a barrier with given client address, barrier type and number of
221  * participants. The 'reinitialization' argument indicates whether a barrier
222  * object is being initialized or reinitialized.
223  *
224  * Called before pthread_barrier_init().
225  */
DRD_(barrier_init)226 void DRD_(barrier_init)(const Addr barrier,
227                         const BarrierT barrier_type, const Word count,
228                         const Bool reinitialization)
229 {
230    struct barrier_info* p;
231 
232    tl_assert(barrier_type == pthread_barrier || barrier_type == gomp_barrier);
233 
234    if (count == 0)
235    {
236       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
237       VG_(maybe_record_error)(VG_(get_running_tid)(),
238                               BarrierErr,
239                               VG_(get_IP)(VG_(get_running_tid)()),
240                               "pthread_barrier_init: 'count' argument is zero",
241                               &bei);
242    }
243 
244    if (! reinitialization && barrier_type == pthread_barrier)
245    {
246       p = DRD_(barrier_get)(barrier);
247       if (p)
248       {
249          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), barrier, 0, 0 };
250          VG_(maybe_record_error)(VG_(get_running_tid)(),
251                                  BarrierErr,
252                                  VG_(get_IP)(VG_(get_running_tid)()),
253                                  "Barrier reinitialization",
254                                  &bei);
255       }
256    }
257 
258    p = DRD_(barrier_get_or_allocate)(barrier, barrier_type, count);
259 
260    if (s_trace_barrier) {
261       if (reinitialization)
262          DRD_(trace_msg)("[%d] barrier_reinit    %s 0x%lx count %ld -> %ld",
263                          DRD_(thread_get_running_tid)(),
264                          barrier_get_typename(p), barrier, p->count, count);
265       else
266          DRD_(trace_msg)("[%d] barrier_init      %s 0x%lx",
267                          DRD_(thread_get_running_tid)(),
268                          barrier_get_typename(p),
269                          barrier);
270    }
271 
272    if (reinitialization && p->count != count)
273    {
274       if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
275       {
276          BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
277          VG_(maybe_record_error)(VG_(get_running_tid)(),
278                                  BarrierErr,
279                                  VG_(get_IP)(VG_(get_running_tid)()),
280                                  "Reinitialization of barrier with active"
281                                  " waiters",
282                                  &bei);
283       }
284       p->count = count;
285    }
286 }
287 
288 /** Called after pthread_barrier_destroy() / gomp_barrier_destroy(). */
DRD_(barrier_destroy)289 void DRD_(barrier_destroy)(const Addr barrier, const BarrierT barrier_type)
290 {
291    struct barrier_info* p;
292 
293    p = DRD_(barrier_get)(barrier);
294 
295    if (s_trace_barrier)
296       DRD_(trace_msg)("[%d] barrier_destroy   %s 0x%lx",
297                       DRD_(thread_get_running_tid)(),
298                       barrier_get_typename(p), barrier);
299 
300    if (p == 0)
301    {
302       GenericErrInfo GEI = {
303 	 .tid = DRD_(thread_get_running_tid)(),
304 	 .addr = barrier,
305       };
306       VG_(maybe_record_error)(VG_(get_running_tid)(),
307                               GenericErr,
308                               VG_(get_IP)(VG_(get_running_tid)()),
309                               "Not a barrier",
310                               &GEI);
311       return;
312    }
313 
314    if (p->pre_waiters_left != p->count || p->post_waiters_left != p->count)
315    {
316       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
317       VG_(maybe_record_error)(VG_(get_running_tid)(),
318                               BarrierErr,
319                               VG_(get_IP)(VG_(get_running_tid)()),
320                               "Destruction of a barrier with active waiters",
321                               &bei);
322    }
323 
324    DRD_(clientobj_remove)(p->a1, ClientBarrier);
325 }
326 
327 /** Called before pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_pre_wait)328 void DRD_(barrier_pre_wait)(const DrdThreadId tid, const Addr barrier,
329                             const BarrierT barrier_type)
330 {
331    struct barrier_info* p;
332    struct barrier_thread_info* q;
333    const UWord word_tid = tid;
334    OSet* oset;
335 
336    p = DRD_(barrier_get)(barrier);
337    if (p == 0 && barrier_type == gomp_barrier) {
338       /*
339        * gomp_barrier_wait() call has been intercepted but gomp_barrier_init()
340        * not. The only cause I know of that can trigger this is that libgomp.so
341        * has been compiled with --enable-linux-futex.
342        */
343       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), 0, 0, 0 };
344       VG_(maybe_record_error)(VG_(get_running_tid)(),
345                               BarrierErr,
346                               VG_(get_IP)(VG_(get_running_tid)()),
347                               "Please verify whether gcc has been configured"
348                               " with option --disable-linux-futex. See also"
349                               " the section about OpenMP in the DRD manual.",
350                               &bei);
351    }
352    tl_assert(p);
353 
354    if (s_trace_barrier)
355       DRD_(trace_msg)("[%d] barrier_pre_wait  %s 0x%lx iteration %ld",
356                       DRD_(thread_get_running_tid)(),
357                       barrier_get_typename(p), barrier, p->pre_iteration);
358 
359    /* Clean up nodes associated with finished threads. */
360    oset = p->oset[p->pre_iteration & 1];
361    tl_assert(oset);
362    VG_(OSetGen_ResetIter)(oset);
363    for ( ; (q = VG_(OSetGen_Next)(oset)) != 0; ) {
364       if (q->thread_finished) {
365          void* r = VG_(OSetGen_Remove)(oset, &q->tid);
366          tl_assert(r == q);
367          DRD_(barrier_thread_destroy)(q);
368          VG_(OSetGen_FreeNode)(oset, q);
369          VG_(OSetGen_ResetIterAt)(oset, &word_tid);
370       }
371    }
372    /* Allocate the per-thread data structure if necessary. */
373    q = VG_(OSetGen_Lookup)(oset, &word_tid);
374    if (q == NULL) {
375       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
376       DRD_(barrier_thread_initialize)(q, tid);
377       VG_(OSetGen_Insert)(oset, q);
378       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
379    }
380 
381    /* Record *_barrier_wait() call context. */
382    q->wait_call_ctxt = VG_(record_ExeContext)(VG_(get_running_tid)(), 0);
383 
384    /*
385     * Store a pointer to the latest segment of the current thread in the
386     * per-thread data structure.
387     */
388    DRD_(thread_get_latest_segment)(&q->sg, tid);
389 
390    /*
391     * If the same number of threads as the barrier count indicates have
392     * called the pre *_barrier_wait() wrapper, toggle p->pre_iteration and
393     * reset the p->pre_waiters_left counter.
394     */
395    if (--p->pre_waiters_left <= 0)
396    {
397       p->pre_iteration++;
398       p->pre_waiters_left = p->count;
399    }
400 }
401 
402 /** Called after pthread_barrier_wait() / gomp_barrier_wait(). */
DRD_(barrier_post_wait)403 void DRD_(barrier_post_wait)(const DrdThreadId tid, const Addr barrier,
404                              const BarrierT barrier_type, const Bool waited,
405                              const Bool serializing)
406 {
407    struct barrier_info* p;
408    const UWord word_tid = tid;
409    struct barrier_thread_info* q;
410    struct barrier_thread_info* r;
411    OSet* oset;
412 
413    p = DRD_(barrier_get)(barrier);
414 
415    if (s_trace_barrier)
416       DRD_(trace_msg)("[%d] barrier_post_wait %s 0x%lx iteration %ld%s",
417                       tid, p ? barrier_get_typename(p) : "(?)",
418                       barrier, p ? p->post_iteration : -1,
419                       serializing ? " (serializing)" : "");
420 
421    /*
422     * If p == 0, this means that the barrier has been destroyed after
423     * *_barrier_wait() returned and before this function was called. Just
424     * return in that case -- race conditions between *_barrier_wait()
425     * and *_barrier_destroy() are detected by the *_barrier_destroy() wrapper.
426     */
427    if (p == 0)
428       return;
429 
430    /* If the *_barrier_wait() call returned an error code, exit. */
431    if (! waited)
432       return;
433 
434    oset = p->oset[p->post_iteration & 1];
435    q = VG_(OSetGen_Lookup)(oset, &word_tid);
436    if (p->pre_iteration - p->post_iteration > 1) {
437       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
438       VG_(maybe_record_error)(VG_(get_running_tid)(),
439                               BarrierErr,
440                               VG_(get_IP)(VG_(get_running_tid)()),
441                               "Number of concurrent pthread_barrier_wait()"
442                               " calls exceeds the barrier count",
443                               &bei);
444    } else if (q == NULL) {
445       BarrierErrInfo bei = { DRD_(thread_get_running_tid)(), p->a1, 0, 0 };
446       VG_(maybe_record_error)(VG_(get_running_tid)(),
447                               BarrierErr,
448                               VG_(get_IP)(VG_(get_running_tid)()),
449                               "Error in barrier implementation"
450                               " -- barrier_wait() started before"
451                               " barrier_destroy() and finished after"
452                               " barrier_destroy()",
453                               &bei);
454    }
455    if (q == NULL) {
456       q = VG_(OSetGen_AllocNode)(oset, sizeof(*q));
457       DRD_(barrier_thread_initialize)(q, tid);
458       VG_(OSetGen_Insert)(oset, q);
459       tl_assert(VG_(OSetGen_Lookup)(oset, &word_tid) == q);
460       DRD_(thread_get_latest_segment)(&q->sg, tid);
461    }
462 
463    /* Create a new segment and store a pointer to that segment. */
464    DRD_(thread_new_segment)(tid);
465    DRD_(thread_get_latest_segment)(&q->post_wait_sg, tid);
466    s_barrier_segment_creation_count++;
467 
468    /*
469     * Combine all vector clocks that were stored in the pre_barrier_wait
470     * wrapper with the vector clock of the current thread.
471     */
472    {
473       VectorClock old_vc;
474 
475       DRD_(vc_copy)(&old_vc, DRD_(thread_get_vc)(tid));
476       VG_(OSetGen_ResetIter)(oset);
477       for ( ; (r = VG_(OSetGen_Next)(oset)) != 0; )
478       {
479          if (r != q)
480          {
481             tl_assert(r->sg);
482             DRD_(vc_combine)(DRD_(thread_get_vc)(tid), &r->sg->vc);
483          }
484       }
485       DRD_(thread_update_conflict_set)(tid, &old_vc);
486       DRD_(vc_cleanup)(&old_vc);
487    }
488 
489    /*
490     * If the same number of threads as the barrier count indicates have
491     * called the post *_barrier_wait() wrapper, toggle p->post_iteration and
492     * reset the p->post_waiters_left counter.
493     */
494    if (--p->post_waiters_left <= 0)
495    {
496       p->post_iteration++;
497       p->post_waiters_left = p->count;
498    }
499 }
500 
501 /** Called when thread tid stops to exist. */
barrier_delete_thread(struct barrier_info * const p,const DrdThreadId tid)502 static void barrier_delete_thread(struct barrier_info* const p,
503                                   const DrdThreadId tid)
504 {
505    struct barrier_thread_info* q;
506    const UWord word_tid = tid;
507    int i;
508 
509    for (i = 0; i < 2; i++) {
510       q = VG_(OSetGen_Lookup)(p->oset[i], &word_tid);
511       if (q)
512          q->thread_finished = True;
513    }
514 }
515 
516 /**
517  * Report that *_barrier_destroy() has been called but that this call was
518  * not synchronized with the last *_barrier_wait() call on the same barrier.
519  *
520  * This topic has been discussed extensively on comp.programming.threads
521  * (February 3, 2009). See also
522  * <a href="http://groups.google.com/group/comp.programming.threads/browse_thread/thread/4f65535d6192aa50/a5f4bf1e3b437c4d">Immediately destroying pthread barriers</a>.
523  */
524 static
barrier_report_wait_delete_race(const struct barrier_info * const p,const struct barrier_thread_info * const q)525 void barrier_report_wait_delete_race(const struct barrier_info* const p,
526                                      const struct barrier_thread_info* const q)
527 {
528    tl_assert(p);
529    tl_assert(q);
530 
531    {
532       BarrierErrInfo bei
533          = { DRD_(thread_get_running_tid)(), p->a1, q->tid, q->wait_call_ctxt };
534       VG_(maybe_record_error)(VG_(get_running_tid)(),
535                               BarrierErr,
536                               VG_(get_IP)(VG_(get_running_tid)()),
537                               "Destruction of barrier not synchronized with"
538                               " barrier wait call",
539                               &bei);
540    }
541 }
542 
barrier_get_typename(struct barrier_info * const p)543 static const HChar* barrier_get_typename(struct barrier_info* const p)
544 {
545    tl_assert(p);
546 
547    return barrier_type_name(p->barrier_type);
548 }
549 
barrier_type_name(const BarrierT bt)550 static const HChar* barrier_type_name(const BarrierT bt)
551 {
552    switch (bt)
553    {
554    case pthread_barrier:
555       return "pthread barrier";
556    case gomp_barrier:
557       return "gomp barrier";
558    }
559    return "?";
560 }
561 
DRD_(get_barrier_segment_creation_count)562 ULong DRD_(get_barrier_segment_creation_count)(void)
563 {
564    return s_barrier_segment_creation_count;
565 }
566