1 //===------- state-queuei.h - OpenMP GPU State Queue ------------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the implementation of a queue to hand out OpenMP state
10 // objects to teams of one or more kernels.
11 //
12 // Reference:
13 // Thomas R.W. Scogland and Wu-chun Feng. 2015.
14 // Design and Evaluation of Scalable Concurrent Queues for Many-Core
15 // Architectures. International Conference on Performance Engineering.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "state-queue.h"
20 #include "common/target_atomic.h"
21 
22 template <typename ElementType, uint32_t SIZE>
ENQUEUE_TICKET()23 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::ENQUEUE_TICKET() {
24   return __kmpc_atomic_add((unsigned int *)&tail, 1u);
25 }
26 
27 template <typename ElementType, uint32_t SIZE>
DEQUEUE_TICKET()28 INLINE uint32_t omptarget_nvptx_Queue<ElementType, SIZE>::DEQUEUE_TICKET() {
29   return __kmpc_atomic_add((unsigned int *)&head, 1u);
30 }
31 
32 template <typename ElementType, uint32_t SIZE>
33 INLINE uint32_t
ID(uint32_t ticket)34 omptarget_nvptx_Queue<ElementType, SIZE>::ID(uint32_t ticket) {
35   return (ticket / SIZE) * 2;
36 }
37 
38 template <typename ElementType, uint32_t SIZE>
IsServing(uint32_t slot,uint32_t id)39 INLINE bool omptarget_nvptx_Queue<ElementType, SIZE>::IsServing(uint32_t slot,
40                                                                 uint32_t id) {
41   return __kmpc_atomic_add((unsigned int *)&ids[slot], 0u) == id;
42 }
43 
44 template <typename ElementType, uint32_t SIZE>
45 INLINE void
PushElement(uint32_t slot,ElementType * element)46 omptarget_nvptx_Queue<ElementType, SIZE>::PushElement(uint32_t slot,
47                                                       ElementType *element) {
48   __kmpc_atomic_exchange((unsigned long long *)&elementQueue[slot],
49                          (unsigned long long)element);
50 }
51 
52 template <typename ElementType, uint32_t SIZE>
53 INLINE ElementType *
PopElement(uint32_t slot)54 omptarget_nvptx_Queue<ElementType, SIZE>::PopElement(uint32_t slot) {
55   return (ElementType *)__kmpc_atomic_add(
56       (unsigned long long *)&elementQueue[slot], (unsigned long long)0);
57 }
58 
59 template <typename ElementType, uint32_t SIZE>
DoneServing(uint32_t slot,uint32_t id)60 INLINE void omptarget_nvptx_Queue<ElementType, SIZE>::DoneServing(uint32_t slot,
61                                                                   uint32_t id) {
62   __kmpc_atomic_exchange((unsigned int *)&ids[slot], (id + 1) % MAX_ID);
63 }
64 
65 template <typename ElementType, uint32_t SIZE>
66 INLINE void
Enqueue(ElementType * element)67 omptarget_nvptx_Queue<ElementType, SIZE>::Enqueue(ElementType *element) {
68   uint32_t ticket = ENQUEUE_TICKET();
69   uint32_t slot = ticket % SIZE;
70   uint32_t id = ID(ticket) + 1;
71   while (!IsServing(slot, id))
72     ;
73   PushElement(slot, element);
74   DoneServing(slot, id);
75 }
76 
77 template <typename ElementType, uint32_t SIZE>
Dequeue()78 INLINE ElementType *omptarget_nvptx_Queue<ElementType, SIZE>::Dequeue() {
79   uint32_t ticket = DEQUEUE_TICKET();
80   uint32_t slot = ticket % SIZE;
81   uint32_t id = ID(ticket);
82   while (!IsServing(slot, id))
83     ;
84   ElementType *element = PopElement(slot);
85   // This is to populate the queue because of the lack of GPU constructors.
86   if (element == 0)
87     element = &elements[slot];
88   DoneServing(slot, id);
89   return element;
90 }
91