1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 // SOFTWARE.
21 //
22 
23 #include "core/kernel.hpp"
24 #include "core/resource.hpp"
25 #include "pipe/p_context.h"
26 
27 using namespace clover;
28 
_cl_kernel(clover::program & prog,const std::string & name,const std::vector<clover::module::argument> & args)29 _cl_kernel::_cl_kernel(clover::program &prog,
30                        const std::string &name,
31                        const std::vector<clover::module::argument> &args) :
32    prog(prog), __name(name), exec(*this) {
33    for (auto arg : args) {
34       if (arg.type == module::argument::scalar)
35          this->args.emplace_back(new scalar_argument(arg.size));
36       else if (arg.type == module::argument::global)
37          this->args.emplace_back(new global_argument(arg.size));
38       else if (arg.type == module::argument::local)
39          this->args.emplace_back(new local_argument());
40       else if (arg.type == module::argument::constant)
41          this->args.emplace_back(new constant_argument());
42       else if (arg.type == module::argument::image2d_rd ||
43                arg.type == module::argument::image3d_rd)
44          this->args.emplace_back(new image_rd_argument());
45       else if (arg.type == module::argument::image2d_wr ||
46                arg.type == module::argument::image3d_wr)
47          this->args.emplace_back(new image_wr_argument());
48       else if (arg.type == module::argument::sampler)
49          this->args.emplace_back(new sampler_argument());
50       else
51          throw error(CL_INVALID_KERNEL_DEFINITION);
52    }
53 }
54 
55 template<typename T, typename V>
56 static inline std::vector<T>
pad_vector(clover::command_queue & q,const V & v,T x)57 pad_vector(clover::command_queue &q, const V &v, T x) {
58    std::vector<T> w { v.begin(), v.end() };
59    w.resize(q.dev.max_block_size().size(), x);
60    return w;
61 }
62 
63 void
launch(clover::command_queue & q,const std::vector<size_t> & grid_offset,const std::vector<size_t> & grid_size,const std::vector<size_t> & block_size)64 _cl_kernel::launch(clover::command_queue &q,
65                    const std::vector<size_t> &grid_offset,
66                    const std::vector<size_t> &grid_size,
67                    const std::vector<size_t> &block_size) {
68    void *st = exec.bind(&q);
69    auto g_handles = map([&](size_t h) { return (uint32_t *)&exec.input[h]; },
70                         exec.g_handles.begin(), exec.g_handles.end());
71 
72    q.pipe->bind_compute_state(q.pipe, st);
73    q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(),
74                                        exec.samplers.data());
75    q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(),
76                                      exec.sviews.data());
77    q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
78                                      exec.resources.data());
79    q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
80                               exec.g_buffers.data(), g_handles.data());
81 
82    q.pipe->launch_grid(q.pipe,
83                        pad_vector<uint>(q, block_size, 1).data(),
84                        pad_vector<uint>(q, grid_size, 1).data(),
85                        module(q).sym(__name).offset,
86                        exec.input.data());
87 
88    q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
89    q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
90    q.pipe->set_compute_sampler_views(q.pipe, 0, exec.sviews.size(), NULL);
91    q.pipe->bind_compute_sampler_states(q.pipe, 0, exec.samplers.size(), NULL);
92    exec.unbind();
93 }
94 
95 size_t
mem_local() const96 _cl_kernel::mem_local() const {
97    size_t sz = 0;
98 
99    for (auto &arg : args) {
100       if (dynamic_cast<local_argument *>(arg.get()))
101          sz += arg->storage();
102    }
103 
104    return sz;
105 }
106 
107 size_t
mem_private() const108 _cl_kernel::mem_private() const {
109    return 0;
110 }
111 
112 size_t
max_block_size() const113 _cl_kernel::max_block_size() const {
114    return SIZE_MAX;
115 }
116 
117 const std::string &
name() const118 _cl_kernel::name() const {
119    return __name;
120 }
121 
122 std::vector<size_t>
block_size() const123 _cl_kernel::block_size() const {
124    return { 0, 0, 0 };
125 }
126 
127 const clover::module &
module(const clover::command_queue & q) const128 _cl_kernel::module(const clover::command_queue &q) const {
129    return prog.binaries().find(&q.dev)->second;
130 }
131 
132 
exec_context(clover::kernel & kern)133 _cl_kernel::exec_context::exec_context(clover::kernel &kern) :
134    kern(kern), q(NULL), mem_local(0), st(NULL) {
135 }
136 
~exec_context()137 _cl_kernel::exec_context::~exec_context() {
138    if (st)
139       q->pipe->delete_compute_state(q->pipe, st);
140 }
141 
142 void *
bind(clover::command_queue * __q)143 _cl_kernel::exec_context::bind(clover::command_queue *__q) {
144    std::swap(q, __q);
145 
146    for (auto &arg : kern.args)
147       arg->bind(*this);
148 
149    // Create a new compute state if anything changed.
150    if (!st || q != __q ||
151        cs.req_local_mem != mem_local ||
152        cs.req_input_mem != input.size()) {
153       if (st)
154          __q->pipe->delete_compute_state(__q->pipe, st);
155 
156       cs.prog = kern.module(*q).sec(module::section::text).data.begin();
157       cs.req_local_mem = mem_local;
158       cs.req_input_mem = input.size();
159       st = q->pipe->create_compute_state(q->pipe, &cs);
160    }
161 
162    return st;
163 }
164 
165 void
unbind()166 _cl_kernel::exec_context::unbind() {
167    for (auto &arg : kern.args)
168       arg->unbind(*this);
169 
170    input.clear();
171    samplers.clear();
172    sviews.clear();
173    resources.clear();
174    g_buffers.clear();
175    g_handles.clear();
176    mem_local = 0;
177 }
178 
argument(size_t size)179 _cl_kernel::argument::argument(size_t size) :
180    __size(size), __set(false) {
181 }
182 
183 bool
set() const184 _cl_kernel::argument::set() const {
185    return __set;
186 }
187 
188 size_t
storage() const189 _cl_kernel::argument::storage() const {
190    return 0;
191 }
192 
scalar_argument(size_t size)193 _cl_kernel::scalar_argument::scalar_argument(size_t size) :
194    argument(size) {
195 }
196 
197 void
set(size_t size,const void * value)198 _cl_kernel::scalar_argument::set(size_t size, const void *value) {
199    if (size != __size)
200       throw error(CL_INVALID_ARG_SIZE);
201 
202    v = { (uint8_t *)value, (uint8_t *)value + size };
203    __set = true;
204 }
205 
206 void
bind(exec_context & ctx)207 _cl_kernel::scalar_argument::bind(exec_context &ctx) {
208    ctx.input.insert(ctx.input.end(), v.begin(), v.end());
209 }
210 
211 void
unbind(exec_context & ctx)212 _cl_kernel::scalar_argument::unbind(exec_context &ctx) {
213 }
214 
global_argument(size_t size)215 _cl_kernel::global_argument::global_argument(size_t size) :
216    argument(size) {
217 }
218 
219 void
set(size_t size,const void * value)220 _cl_kernel::global_argument::set(size_t size, const void *value) {
221    if (size != sizeof(cl_mem))
222       throw error(CL_INVALID_ARG_SIZE);
223 
224    obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
225    __set = true;
226 }
227 
228 void
bind(exec_context & ctx)229 _cl_kernel::global_argument::bind(exec_context &ctx) {
230    size_t offset = ctx.input.size();
231    size_t idx = ctx.g_buffers.size();
232 
233    ctx.input.resize(offset + __size);
234 
235    ctx.g_buffers.resize(idx + 1);
236    ctx.g_buffers[idx] = obj->resource(ctx.q).pipe;
237 
238    ctx.g_handles.resize(idx + 1);
239    ctx.g_handles[idx] = offset;
240 }
241 
242 void
unbind(exec_context & ctx)243 _cl_kernel::global_argument::unbind(exec_context &ctx) {
244 }
245 
local_argument()246 _cl_kernel::local_argument::local_argument() :
247    argument(sizeof(uint32_t)) {
248 }
249 
250 size_t
storage() const251 _cl_kernel::local_argument::storage() const {
252    return __storage;
253 }
254 
255 void
set(size_t size,const void * value)256 _cl_kernel::local_argument::set(size_t size, const void *value) {
257    if (value)
258       throw error(CL_INVALID_ARG_VALUE);
259 
260    __storage = size;
261    __set = true;
262 }
263 
264 void
bind(exec_context & ctx)265 _cl_kernel::local_argument::bind(exec_context &ctx) {
266    size_t offset = ctx.input.size();
267    size_t ptr = ctx.mem_local;
268 
269    ctx.input.resize(offset + sizeof(uint32_t));
270    *(uint32_t *)&ctx.input[offset] = ptr;
271 
272    ctx.mem_local += __storage;
273 }
274 
275 void
unbind(exec_context & ctx)276 _cl_kernel::local_argument::unbind(exec_context &ctx) {
277 }
278 
constant_argument()279 _cl_kernel::constant_argument::constant_argument() :
280    argument(sizeof(uint32_t)) {
281 }
282 
283 void
set(size_t size,const void * value)284 _cl_kernel::constant_argument::set(size_t size, const void *value) {
285    if (size != sizeof(cl_mem))
286       throw error(CL_INVALID_ARG_SIZE);
287 
288    obj = dynamic_cast<clover::buffer *>(*(cl_mem *)value);
289    __set = true;
290 }
291 
292 void
bind(exec_context & ctx)293 _cl_kernel::constant_argument::bind(exec_context &ctx) {
294    size_t offset = ctx.input.size();
295    size_t idx = ctx.resources.size();
296 
297    ctx.input.resize(offset + sizeof(uint32_t));
298    *(uint32_t *)&ctx.input[offset] = idx << 24;
299 
300    ctx.resources.resize(idx + 1);
301    ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, false);
302 }
303 
304 void
unbind(exec_context & ctx)305 _cl_kernel::constant_argument::unbind(exec_context &ctx) {
306    obj->resource(ctx.q).unbind_surface(*ctx.q, st);
307 }
308 
image_rd_argument()309 _cl_kernel::image_rd_argument::image_rd_argument() :
310    argument(sizeof(uint32_t)) {
311 }
312 
313 void
set(size_t size,const void * value)314 _cl_kernel::image_rd_argument::set(size_t size, const void *value) {
315    if (size != sizeof(cl_mem))
316       throw error(CL_INVALID_ARG_SIZE);
317 
318    obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
319    __set = true;
320 }
321 
322 void
bind(exec_context & ctx)323 _cl_kernel::image_rd_argument::bind(exec_context &ctx) {
324    size_t offset = ctx.input.size();
325    size_t idx = ctx.sviews.size();
326 
327    ctx.input.resize(offset + sizeof(uint32_t));
328    *(uint32_t *)&ctx.input[offset] = idx;
329 
330    ctx.sviews.resize(idx + 1);
331    ctx.sviews[idx] = st = obj->resource(ctx.q).bind_sampler_view(*ctx.q);
332 }
333 
334 void
unbind(exec_context & ctx)335 _cl_kernel::image_rd_argument::unbind(exec_context &ctx) {
336    obj->resource(ctx.q).unbind_sampler_view(*ctx.q, st);
337 }
338 
image_wr_argument()339 _cl_kernel::image_wr_argument::image_wr_argument() :
340    argument(sizeof(uint32_t)) {
341 }
342 
343 void
set(size_t size,const void * value)344 _cl_kernel::image_wr_argument::set(size_t size, const void *value) {
345    if (size != sizeof(cl_mem))
346       throw error(CL_INVALID_ARG_SIZE);
347 
348    obj = dynamic_cast<clover::image *>(*(cl_mem *)value);
349    __set = true;
350 }
351 
352 void
bind(exec_context & ctx)353 _cl_kernel::image_wr_argument::bind(exec_context &ctx) {
354    size_t offset = ctx.input.size();
355    size_t idx = ctx.resources.size();
356 
357    ctx.input.resize(offset + sizeof(uint32_t));
358    *(uint32_t *)&ctx.input[offset] = idx;
359 
360    ctx.resources.resize(idx + 1);
361    ctx.resources[idx] = st = obj->resource(ctx.q).bind_surface(*ctx.q, true);
362 }
363 
364 void
unbind(exec_context & ctx)365 _cl_kernel::image_wr_argument::unbind(exec_context &ctx) {
366    obj->resource(ctx.q).unbind_surface(*ctx.q, st);
367 }
368 
sampler_argument()369 _cl_kernel::sampler_argument::sampler_argument() :
370    argument(0) {
371 }
372 
373 void
set(size_t size,const void * value)374 _cl_kernel::sampler_argument::set(size_t size, const void *value) {
375    if (size != sizeof(cl_sampler))
376       throw error(CL_INVALID_ARG_SIZE);
377 
378    obj = *(cl_sampler *)value;
379    __set = true;
380 }
381 
382 void
bind(exec_context & ctx)383 _cl_kernel::sampler_argument::bind(exec_context &ctx) {
384    size_t idx = ctx.samplers.size();
385 
386    ctx.samplers.resize(idx + 1);
387    ctx.samplers[idx] = st = obj->bind(*ctx.q);
388 }
389 
390 void
unbind(exec_context & ctx)391 _cl_kernel::sampler_argument::unbind(exec_context &ctx) {
392    obj->unbind(*ctx.q, st);
393 }
394