1 /*
2  * Copyright © 2016 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining
6  * a copy of this software and associated documentation files (the
7  * "Software"), to deal in the Software without restriction, including
8  * without limitation the rights to use, copy, modify, merge, publish,
9  * distribute, sub license, and/or sell copies of the Software, and to
10  * permit persons to whom the Software is furnished to do so, subject to
11  * the following conditions:
12  *
13  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16  * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17  * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  */
26 
27 /* Job queue with execution in a separate thread.
28  *
29  * Jobs can be added from any thread. After that, the wait call can be used
30  * to wait for completion of the job.
31  */
32 
33 #ifndef U_QUEUE_H
34 #define U_QUEUE_H
35 
36 #include <string.h>
37 
38 #include "util/futex.h"
39 #include "util/list.h"
40 #include "util/macros.h"
41 #include "util/os_time.h"
42 #include "util/u_atomic.h"
43 #include "util/u_thread.h"
44 
45 #ifdef __cplusplus
46 extern "C" {
47 #endif
48 
49 #define UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY      (1 << 0)
50 #define UTIL_QUEUE_INIT_RESIZE_IF_FULL            (1 << 1)
51 #define UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY  (1 << 2)
52 
53 #if UTIL_FUTEX_SUPPORTED
54 #define UTIL_QUEUE_FENCE_FUTEX
55 #else
56 #define UTIL_QUEUE_FENCE_STANDARD
57 #endif
58 
59 #ifdef UTIL_QUEUE_FENCE_FUTEX
60 /* Job completion fence.
61  * Put this into your job structure.
62  */
63 struct util_queue_fence {
64    /* The fence can be in one of three states:
65     *  0 - signaled
66     *  1 - unsignaled
67     *  2 - unsignaled, may have waiters
68     */
69    uint32_t val;
70 };
71 
72 static inline void
util_queue_fence_init(struct util_queue_fence * fence)73 util_queue_fence_init(struct util_queue_fence *fence)
74 {
75    fence->val = 0;
76 }
77 
78 static inline void
util_queue_fence_destroy(struct util_queue_fence * fence)79 util_queue_fence_destroy(struct util_queue_fence *fence)
80 {
81    assert(fence->val == 0);
82    /* no-op */
83 }
84 
85 static inline void
util_queue_fence_signal(struct util_queue_fence * fence)86 util_queue_fence_signal(struct util_queue_fence *fence)
87 {
88    uint32_t val = p_atomic_xchg(&fence->val, 0);
89 
90    assert(val != 0);
91 
92    if (val == 2)
93       futex_wake(&fence->val, INT_MAX);
94 }
95 
96 /**
97  * Move \p fence back into unsignalled state.
98  *
99  * \warning The caller must ensure that no other thread may currently be
100  *          waiting (or about to wait) on the fence.
101  */
102 static inline void
util_queue_fence_reset(struct util_queue_fence * fence)103 util_queue_fence_reset(struct util_queue_fence *fence)
104 {
105 #ifdef NDEBUG
106    fence->val = 1;
107 #else
108    uint32_t v = p_atomic_xchg(&fence->val, 1);
109    assert(v == 0);
110 #endif
111 }
112 
113 static inline bool
util_queue_fence_is_signalled(struct util_queue_fence * fence)114 util_queue_fence_is_signalled(struct util_queue_fence *fence)
115 {
116    return fence->val == 0;
117 }
118 #endif
119 
120 #ifdef UTIL_QUEUE_FENCE_STANDARD
121 /* Job completion fence.
122  * Put this into your job structure.
123  */
124 struct util_queue_fence {
125    mtx_t mutex;
126    cnd_t cond;
127    int signalled;
128 };
129 
130 void util_queue_fence_init(struct util_queue_fence *fence);
131 void util_queue_fence_destroy(struct util_queue_fence *fence);
132 void util_queue_fence_signal(struct util_queue_fence *fence);
133 
134 /**
135  * Move \p fence back into unsignalled state.
136  *
137  * \warning The caller must ensure that no other thread may currently be
138  *          waiting (or about to wait) on the fence.
139  */
140 static inline void
util_queue_fence_reset(struct util_queue_fence * fence)141 util_queue_fence_reset(struct util_queue_fence *fence)
142 {
143    assert(fence->signalled);
144    fence->signalled = 0;
145 }
146 
147 static inline bool
util_queue_fence_is_signalled(struct util_queue_fence * fence)148 util_queue_fence_is_signalled(struct util_queue_fence *fence)
149 {
150    return fence->signalled != 0;
151 }
152 #endif
153 
154 void
155 _util_queue_fence_wait(struct util_queue_fence *fence);
156 
157 static inline void
util_queue_fence_wait(struct util_queue_fence * fence)158 util_queue_fence_wait(struct util_queue_fence *fence)
159 {
160    if (unlikely(!util_queue_fence_is_signalled(fence)))
161       _util_queue_fence_wait(fence);
162 }
163 
164 bool
165 _util_queue_fence_wait_timeout(struct util_queue_fence *fence,
166                                int64_t abs_timeout);
167 
168 /**
169  * Wait for the fence to be signaled with a timeout.
170  *
171  * \param fence the fence
172  * \param abs_timeout the absolute timeout in nanoseconds, relative to the
173  *                    clock provided by os_time_get_nano.
174  *
175  * \return true if the fence was signaled, false if the timeout occurred.
176  */
177 static inline bool
util_queue_fence_wait_timeout(struct util_queue_fence * fence,int64_t abs_timeout)178 util_queue_fence_wait_timeout(struct util_queue_fence *fence,
179                               int64_t abs_timeout)
180 {
181    if (util_queue_fence_is_signalled(fence))
182       return true;
183 
184    if (abs_timeout == (int64_t)OS_TIMEOUT_INFINITE) {
185       _util_queue_fence_wait(fence);
186       return true;
187    }
188 
189    return _util_queue_fence_wait_timeout(fence, abs_timeout);
190 }
191 
192 typedef void (*util_queue_execute_func)(void *job, int thread_index);
193 
194 struct util_queue_job {
195    void *job;
196    size_t job_size;
197    struct util_queue_fence *fence;
198    util_queue_execute_func execute;
199    util_queue_execute_func cleanup;
200 };
201 
202 /* Put this into your context. */
203 struct util_queue {
204    char name[14]; /* 13 characters = the thread name without the index */
205    mtx_t finish_lock; /* for util_queue_finish and protects threads/num_threads */
206    mtx_t lock;
207    cnd_t has_queued_cond;
208    cnd_t has_space_cond;
209    thrd_t *threads;
210    unsigned flags;
211    int num_queued;
212    unsigned max_threads;
213    unsigned num_threads; /* decreasing this number will terminate threads */
214    int max_jobs;
215    int write_idx, read_idx; /* ring buffer pointers */
216    size_t total_jobs_size;  /* memory use of all jobs in the queue */
217    struct util_queue_job *jobs;
218 
219    /* for cleanup at exit(), protected by exit_mutex */
220    struct list_head head;
221 };
222 
223 bool util_queue_init(struct util_queue *queue,
224                      const char *name,
225                      unsigned max_jobs,
226                      unsigned num_threads,
227                      unsigned flags);
228 void util_queue_destroy(struct util_queue *queue);
229 
230 /* optional cleanup callback is called after fence is signaled: */
231 void util_queue_add_job(struct util_queue *queue,
232                         void *job,
233                         struct util_queue_fence *fence,
234                         util_queue_execute_func execute,
235                         util_queue_execute_func cleanup,
236                         const size_t job_size);
237 void util_queue_drop_job(struct util_queue *queue,
238                          struct util_queue_fence *fence);
239 
240 void util_queue_finish(struct util_queue *queue);
241 
242 /* Adjust the number of active threads. The new number of threads can't be
243  * greater than the initial number of threads at the creation of the queue,
244  * and it can't be less than 1.
245  */
246 void
247 util_queue_adjust_num_threads(struct util_queue *queue, unsigned num_threads);
248 
249 int64_t util_queue_get_thread_time_nano(struct util_queue *queue,
250                                         unsigned thread_index);
251 
252 /* util_queue needs to be cleared to zeroes for this to work */
253 static inline bool
util_queue_is_initialized(struct util_queue * queue)254 util_queue_is_initialized(struct util_queue *queue)
255 {
256    return queue->threads != NULL;
257 }
258 
259 /* Convenient structure for monitoring the queue externally and passing
260  * the structure between Mesa components. The queue doesn't use it directly.
261  */
262 struct util_queue_monitoring
263 {
264    /* For querying the thread busyness. */
265    struct util_queue *queue;
266 
267    /* Counters updated by the user of the queue. */
268    unsigned num_offloaded_items;
269    unsigned num_direct_items;
270    unsigned num_syncs;
271 };
272 
273 #ifdef __cplusplus
274 }
275 #endif
276 
277 #endif
278