1 /*
2  * Copyright © 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /* Job queue with execution in a separate thread.
28  *
29  * Jobs can be added from any thread. After that, the wait call can be used
30  * to wait for completion of the job.
31  */
32 
33 #ifndef U_QUEUE_H
34 #define U_QUEUE_H
35 
36 #include <string.h>
37 
38 #include "simple_mtx.h"
39 #include "util/futex.h"
40 #include "util/list.h"
41 #include "util/macros.h"
42 #include "util/os_time.h"
43 #include "util/u_atomic.h"
44 #include "util/u_thread.h"
45 
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49 
50 #define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
51 #define UTIL_QUEUE_INIT_RESIZE_IF_FULL            (1 << 1)
52 #define UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY  (1 << 2)
53 
54 #if UTIL_FUTEX_SUPPORTED
55 #define UTIL_QUEUE_FENCE_FUTEX
56 #else
57 #define UTIL_QUEUE_FENCE_STANDARD
58 #endif
59 
60 #ifdef UTIL_QUEUE_FENCE_FUTEX
61 /* Job completion fence.
62  * Put this into your job structure.
63  */
64 struct util_queue_fence {
65    /* The fence can be in one of three states:
66     *  0 - signaled
67     *  1 - unsignaled
68     *  2 - unsignaled, may have waiters
69     */
70    uint32_t val;
71 };
72 
73 static inline void
util_queue_fence_init(struct util_queue_fence * fence)74 util_queue_fence_init(struct util_queue_fence *fence)
75 {
76    fence->val = 0;
77 }
78 
79 static inline void
util_queue_fence_destroy(struct util_queue_fence * fence)80 util_queue_fence_destroy(struct util_queue_fence *fence)
81 {
82    assert(p_atomic_read_relaxed(&fence->val) == 0);
83    /* no-op */
84 }
85 
86 static inline void
util_queue_fence_signal(struct util_queue_fence * fence)87 util_queue_fence_signal(struct util_queue_fence *fence)
88 {
89    uint32_t val = p_atomic_xchg(&fence->val, 0);
90 
91    assert(val != 0);
92 
93    if (val == 2)
94       futex_wake(&fence->val, INT_MAX);
95 }
96 
97 /**
98  * Move \p fence back into unsignalled state.
99  *
100  * \warning The caller must ensure that no other thread may currently be
101  *          waiting (or about to wait) on the fence.
102  */
103 static inline void
util_queue_fence_reset(struct util_queue_fence * fence)104 util_queue_fence_reset(struct util_queue_fence *fence)
105 {
106 #ifdef NDEBUG
107    fence->val = 1;
108 #else
109    uint32_t v = p_atomic_xchg(&fence->val, 1);
110    assert(v == 0);
111 #endif
112 }
113 
114 static inline bool
util_queue_fence_is_signalled(struct util_queue_fence * fence)115 util_queue_fence_is_signalled(struct util_queue_fence *fence)
116 {
117    return p_atomic_read_relaxed(&fence->val) == 0;
118 }
119 #endif
120 
121 #ifdef UTIL_QUEUE_FENCE_STANDARD
122 /* Job completion fence.
123  * Put this into your job structure.
124  */
125 struct util_queue_fence {
126    mtx_t mutex;
127    cnd_t cond;
128    int signalled;
129 };
130 
131 void util_queue_fence_init(struct util_queue_fence *fence);
132 void util_queue_fence_destroy(struct util_queue_fence *fence);
133 void util_queue_fence_signal(struct util_queue_fence *fence);
134 
135 /**
136  * Move \p fence back into unsignalled state.
137  *
138  * \warning The caller must ensure that no other thread may currently be
139  *          waiting (or about to wait) on the fence.
140  */
141 static inline void
util_queue_fence_reset(struct util_queue_fence * fence)142 util_queue_fence_reset(struct util_queue_fence *fence)
143 {
144    assert(fence->signalled);
145    fence->signalled = 0;
146 }
147 
148 static inline bool
util_queue_fence_is_signalled(struct util_queue_fence * fence)149 util_queue_fence_is_signalled(struct util_queue_fence *fence)
150 {
151    return fence->signalled != 0;
152 }
153 #endif
154 
155 void
156 _util_queue_fence_wait(struct util_queue_fence *fence);
157 
158 static inline void
util_queue_fence_wait(struct util_queue_fence * fence)159 util_queue_fence_wait(struct util_queue_fence *fence)
160 {
161    if (unlikely(!util_queue_fence_is_signalled(fence)))
162       _util_queue_fence_wait(fence);
163 }
164 
165 bool
166 _util_queue_fence_wait_timeout(struct util_queue_fence *fence,
167                                int64_t abs_timeout);
168 
169 /**
170  * Wait for the fence to be signaled with a timeout.
171  *
172  * \param fence the fence
173  * \param abs_timeout the absolute timeout in nanoseconds, relative to the
174  *                    clock provided by os_time_get_nano.
175  *
176  * \return true if the fence was signaled, false if the timeout occurred.
177  */
178 static inline bool
util_queue_fence_wait_timeout(struct util_queue_fence * fence,int64_t abs_timeout)179 util_queue_fence_wait_timeout(struct util_queue_fence *fence,
180                               int64_t abs_timeout)
181 {
182    if (util_queue_fence_is_signalled(fence))
183       return true;
184 
185    if (abs_timeout == (int64_t)OS_TIMEOUT_INFINITE) {
186       _util_queue_fence_wait(fence);
187       return true;
188    }
189 
190    return _util_queue_fence_wait_timeout(fence, abs_timeout);
191 }
192 
193 typedef void (*util_queue_execute_func)(void *job, void *gdata, int thread_index);
194 
195 struct util_queue_job {
196    void *job;
197    void *global_data;
198    size_t job_size;
199    struct util_queue_fence *fence;
200    util_queue_execute_func execute;
201    util_queue_execute_func cleanup;
202 };
203 
204 /* Put this into your context. */
205 struct util_queue {
206    char name[14]; /* 13 characters = the thread name without the index */
207    mtx_t lock;
208    bool create_threads_on_demand;
209    cnd_t has_queued_cond;
210    cnd_t has_space_cond;
211    thrd_t *threads;
212    unsigned flags;
213    int num_queued;
214    unsigned max_threads;
215    unsigned num_threads; /* decreasing this number will terminate threads */
216    int max_jobs;
217    int write_idx, read_idx; /* ring buffer pointers */
218    size_t total_jobs_size;  /* memory use of all jobs in the queue */
219    struct util_queue_job *jobs;
220    void *global_data;
221 
222    /* for cleanup at exit(), protected by exit_mutex */
223    struct list_head head;
224 };
225 
226 bool util_queue_init(struct util_queue *queue,
227                      const char *name,
228                      unsigned max_jobs,
229                      unsigned num_threads,
230                      unsigned flags,
231                      void *global_data);
232 void util_queue_destroy(struct util_queue *queue);
233 
234 /* optional cleanup callback is called after fence is signaled: */
235 void util_queue_add_job(struct util_queue *queue,
236                         void *job,
237                         struct util_queue_fence *fence,
238                         util_queue_execute_func execute,
239                         util_queue_execute_func cleanup,
240                         const size_t job_size);
241 void util_queue_drop_job(struct util_queue *queue,
242                          struct util_queue_fence *fence);
243 
244 void util_queue_finish(struct util_queue *queue);
245 
246 /* Adjust the number of active threads. The new number of threads can't be
247  * greater than the initial number of threads at the creation of the queue,
248  * and it can't be less than 1.
249  */
250 void
251 util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads,
252                               bool locked);
253 
254 int64_t util_queue_get_thread_time_nano(struct util_queue *queue,
255                                         unsigned thread_index);
256 
257 /* util_queue needs to be cleared to zeroes for this to work */
258 static inline bool
util_queue_is_initialized(struct util_queue * queue)259 util_queue_is_initialized(struct util_queue *queue)
260 {
261    return queue->threads != NULL;
262 }
263 
264 /* Convenient structure for monitoring the queue externally and passing
265  * the structure between Mesa components. The queue doesn't use it directly.
266  */
267 struct util_queue_monitoring
268 {
269    /* For querying the thread busyness. */
270    struct util_queue *queue;
271 
272    /* Counters updated by the user of the queue. */
273    unsigned num_offloaded_items;
274    unsigned num_direct_items;
275    unsigned num_syncs;
276    unsigned num_batches;
277 };
278 
279 #ifdef __cplusplus
280 }
281 #endif
282 
283 #endif
284