1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 // OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 // SOFTWARE.
21 //
22 
23 #include <cstring>
24 
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/resource.hpp"
28 
29 using namespace clover;
30 
31 namespace {
32    typedef resource::point point;
33 
34    ///
35    /// Common argument checking shared by memory transfer commands.
36    ///
37    void
validate_base(cl_command_queue q,cl_uint num_deps,const cl_event * deps)38    validate_base(cl_command_queue q, cl_uint num_deps, const cl_event *deps) {
39       if (!q)
40          throw error(CL_INVALID_COMMAND_QUEUE);
41 
42       if (bool(num_deps) != bool(deps) ||
43           any_of(is_zero<cl_event>(), deps, deps + num_deps))
44          throw error(CL_INVALID_EVENT_WAIT_LIST);
45 
46       if (any_of([&](const cl_event ev) {
47                return &ev->ctx != &q->ctx;
48             }, deps, deps + num_deps))
49          throw error(CL_INVALID_CONTEXT);
50    }
51 
52    ///
53    /// Memory object-specific argument checking shared by most memory
54    /// transfer commands.
55    ///
56    void
validate_obj(cl_command_queue q,cl_mem obj)57    validate_obj(cl_command_queue q, cl_mem obj) {
58       if (!obj)
59          throw error(CL_INVALID_MEM_OBJECT);
60 
61       if (&obj->ctx != &q->ctx)
62          throw error(CL_INVALID_CONTEXT);
63    }
64 
65    ///
66    /// Class that encapsulates the task of mapping an object of type
67    /// \a T.  The return value of get() should be implicitly
68    /// convertible to \a void *.
69    ///
70    template<typename T> struct __map;
71 
72    template<> struct __map<void *> {
73       static void *
get__anon8ee564750111::__map74       get(cl_command_queue q, void *obj, cl_map_flags flags,
75           size_t offset, size_t size) {
76          return (char *)obj + offset;
77       }
78    };
79 
80    template<> struct __map<const void *> {
81       static const void *
get__anon8ee564750111::__map82       get(cl_command_queue q, const void *obj, cl_map_flags flags,
83           size_t offset, size_t size) {
84          return (const char *)obj + offset;
85       }
86    };
87 
88    template<> struct __map<memory_obj *> {
89       static mapping
get__anon8ee564750111::__map90       get(cl_command_queue q, memory_obj *obj, cl_map_flags flags,
91           size_t offset, size_t size) {
92          return { *q, obj->resource(q), flags, true, { offset }, { size }};
93       }
94    };
95 
96    ///
97    /// Software copy from \a src_obj to \a dst_obj.  They can be
98    /// either pointers or memory objects.
99    ///
100    template<typename T, typename S>
101    std::function<void (event &)>
soft_copy_op(cl_command_queue q,T dst_obj,const point & dst_orig,const point & dst_pitch,S src_obj,const point & src_orig,const point & src_pitch,const point & region)102    soft_copy_op(cl_command_queue q,
103                 T dst_obj, const point &dst_orig, const point &dst_pitch,
104                 S src_obj, const point &src_orig, const point &src_pitch,
105                 const point &region) {
106       return [=](event &) {
107          auto dst = __map<T>::get(q, dst_obj, CL_MAP_WRITE,
108                                   dst_pitch(dst_orig), dst_pitch(region));
109          auto src = __map<S>::get(q, src_obj, CL_MAP_READ,
110                                   src_pitch(src_orig), src_pitch(region));
111          point p;
112 
113          for (p[2] = 0; p[2] < region[2]; ++p[2]) {
114             for (p[1] = 0; p[1] < region[1]; ++p[1]) {
115                std::memcpy(static_cast<char *>(dst) + dst_pitch(p),
116                            static_cast<const char *>(src) + src_pitch(p),
117                            src_pitch[0] * region[0]);
118             }
119          }
120       };
121    }
122 
123    ///
124    /// Hardware copy from \a src_obj to \a dst_obj.
125    ///
126    template<typename T, typename S>
127    std::function<void (event &)>
hard_copy_op(cl_command_queue q,T dst_obj,const point & dst_orig,S src_obj,const point & src_orig,const point & region)128    hard_copy_op(cl_command_queue q, T dst_obj, const point &dst_orig,
129                 S src_obj, const point &src_orig, const point &region) {
130       return [=](event &) {
131          dst_obj->resource(q).copy(*q, dst_orig, region,
132                                    src_obj->resource(q), src_orig);
133       };
134    }
135 }
136 
137 PUBLIC cl_int
clEnqueueReadBuffer(cl_command_queue q,cl_mem obj,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)138 clEnqueueReadBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
139                     size_t offset, size_t size, void *ptr,
140                     cl_uint num_deps, const cl_event *deps,
141                     cl_event *ev) try {
142    validate_base(q, num_deps, deps);
143    validate_obj(q, obj);
144 
145    if (!ptr || offset > obj->size() || offset + size > obj->size())
146       throw error(CL_INVALID_VALUE);
147 
148    hard_event *hev = new hard_event(
149       *q, CL_COMMAND_READ_BUFFER, { deps, deps + num_deps },
150       soft_copy_op(q,
151                    ptr, { 0 }, { 1 },
152                    obj, { offset }, { 1 },
153                    { size, 1, 1 }));
154 
155    ret_object(ev, hev);
156    return CL_SUCCESS;
157 
158 } catch (error &e) {
159    return e.get();
160 }
161 
162 PUBLIC cl_int
clEnqueueWriteBuffer(cl_command_queue q,cl_mem obj,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)163 clEnqueueWriteBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
164                      size_t offset, size_t size, const void *ptr,
165                      cl_uint num_deps, const cl_event *deps,
166                      cl_event *ev) try {
167    validate_base(q, num_deps, deps);
168    validate_obj(q, obj);
169 
170    if (!ptr || offset > obj->size() || offset + size > obj->size())
171       throw error(CL_INVALID_VALUE);
172 
173    hard_event *hev = new hard_event(
174       *q, CL_COMMAND_WRITE_BUFFER, { deps, deps + num_deps },
175       soft_copy_op(q,
176                    obj, { offset }, { 1 },
177                    ptr, { 0 }, { 1 },
178                    { size, 1, 1 }));
179 
180    ret_object(ev, hev);
181    return CL_SUCCESS;
182 
183 } catch (error &e) {
184    return e.get();
185 }
186 
187 PUBLIC cl_int
clEnqueueReadBufferRect(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * obj_origin,const size_t * host_origin,const size_t * region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)188 clEnqueueReadBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
189                         const size_t *obj_origin, const size_t *host_origin,
190                         const size_t *region,
191                         size_t obj_row_pitch, size_t obj_slice_pitch,
192                         size_t host_row_pitch, size_t host_slice_pitch,
193                         void *ptr,
194                         cl_uint num_deps, const cl_event *deps,
195                         cl_event *ev) try {
196    validate_base(q, num_deps, deps);
197    validate_obj(q, obj);
198 
199    if (!ptr)
200       throw error(CL_INVALID_VALUE);
201 
202    hard_event *hev = new hard_event(
203       *q, CL_COMMAND_READ_BUFFER_RECT, { deps, deps + num_deps },
204       soft_copy_op(q,
205                    ptr, host_origin,
206                    { 1, host_row_pitch, host_slice_pitch },
207                    obj, obj_origin,
208                    { 1, obj_row_pitch, obj_slice_pitch },
209                    region));
210 
211    ret_object(ev, hev);
212    return CL_SUCCESS;
213 
214 } catch (error &e) {
215    return e.get();
216 }
217 
218 PUBLIC cl_int
clEnqueueWriteBufferRect(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * obj_origin,const size_t * host_origin,const size_t * region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)219 clEnqueueWriteBufferRect(cl_command_queue q, cl_mem obj, cl_bool blocking,
220                          const size_t *obj_origin, const size_t *host_origin,
221                          const size_t *region,
222                          size_t obj_row_pitch, size_t obj_slice_pitch,
223                          size_t host_row_pitch, size_t host_slice_pitch,
224                          const void *ptr,
225                          cl_uint num_deps, const cl_event *deps,
226                          cl_event *ev) try {
227    validate_base(q, num_deps, deps);
228    validate_obj(q, obj);
229 
230    if (!ptr)
231       throw error(CL_INVALID_VALUE);
232 
233    hard_event *hev = new hard_event(
234       *q, CL_COMMAND_WRITE_BUFFER_RECT, { deps, deps + num_deps },
235       soft_copy_op(q,
236                    obj, obj_origin,
237                    { 1, obj_row_pitch, obj_slice_pitch },
238                    ptr, host_origin,
239                    { 1, host_row_pitch, host_slice_pitch },
240                    region));
241 
242    ret_object(ev, hev);
243    return CL_SUCCESS;
244 
245 } catch (error &e) {
246    return e.get();
247 }
248 
249 PUBLIC cl_int
clEnqueueCopyBuffer(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * deps,cl_event * ev)250 clEnqueueCopyBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
251                     size_t src_offset, size_t dst_offset, size_t size,
252                     cl_uint num_deps, const cl_event *deps,
253                     cl_event *ev) try {
254    validate_base(q, num_deps, deps);
255    validate_obj(q, src_obj);
256    validate_obj(q, dst_obj);
257 
258    hard_event *hev = new hard_event(
259       *q, CL_COMMAND_COPY_BUFFER, { deps, deps + num_deps },
260       hard_copy_op(q, dst_obj, { dst_offset },
261                    src_obj, { src_offset },
262                    { size, 1, 1 }));
263 
264    ret_object(ev, hev);
265    return CL_SUCCESS;
266 
267 } catch (error &e) {
268    return e.get();
269 }
270 
271 PUBLIC cl_int
clEnqueueCopyBufferRect(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,const size_t * src_origin,const size_t * dst_origin,const size_t * region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * deps,cl_event * ev)272 clEnqueueCopyBufferRect(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
273                         const size_t *src_origin, const size_t *dst_origin,
274                         const size_t *region,
275                         size_t src_row_pitch, size_t src_slice_pitch,
276                         size_t dst_row_pitch, size_t dst_slice_pitch,
277                         cl_uint num_deps, const cl_event *deps,
278                         cl_event *ev) try {
279    validate_base(q, num_deps, deps);
280    validate_obj(q, src_obj);
281    validate_obj(q, dst_obj);
282 
283    hard_event *hev = new hard_event(
284       *q, CL_COMMAND_COPY_BUFFER_RECT, { deps, deps + num_deps },
285       soft_copy_op(q,
286                    dst_obj, dst_origin,
287                    { 1, dst_row_pitch, dst_slice_pitch },
288                    src_obj, src_origin,
289                    { 1, src_row_pitch, src_slice_pitch },
290                    region));
291 
292    ret_object(ev, hev);
293    return CL_SUCCESS;
294 
295 } catch (error &e) {
296    return e.get();
297 }
298 
299 PUBLIC cl_int
clEnqueueReadImage(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * origin,const size_t * region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)300 clEnqueueReadImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
301                    const size_t *origin, const size_t *region,
302                    size_t row_pitch, size_t slice_pitch, void *ptr,
303                    cl_uint num_deps, const cl_event *deps,
304                    cl_event *ev) try {
305    image *img = dynamic_cast<image *>(obj);
306 
307    validate_base(q, num_deps, deps);
308    validate_obj(q, img);
309 
310    if (!ptr)
311       throw error(CL_INVALID_VALUE);
312 
313    hard_event *hev = new hard_event(
314       *q, CL_COMMAND_READ_IMAGE, { deps, deps + num_deps },
315       soft_copy_op(q,
316                    ptr, {},
317                    { 1, row_pitch, slice_pitch },
318                    obj, origin,
319                    { 1, img->row_pitch(), img->slice_pitch() },
320                    region));
321 
322    ret_object(ev, hev);
323    return CL_SUCCESS;
324 
325 } catch (error &e) {
326    return e.get();
327 }
328 
329 PUBLIC cl_int
clEnqueueWriteImage(cl_command_queue q,cl_mem obj,cl_bool blocking,const size_t * origin,const size_t * region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)330 clEnqueueWriteImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
331                     const size_t *origin, const size_t *region,
332                     size_t row_pitch, size_t slice_pitch, const void *ptr,
333                     cl_uint num_deps, const cl_event *deps,
334                     cl_event *ev) try {
335    image *img = dynamic_cast<image *>(obj);
336 
337    validate_base(q, num_deps, deps);
338    validate_obj(q, img);
339 
340    if (!ptr)
341       throw error(CL_INVALID_VALUE);
342 
343    hard_event *hev = new hard_event(
344       *q, CL_COMMAND_WRITE_IMAGE, { deps, deps + num_deps },
345       soft_copy_op(q,
346                    obj, origin,
347                    { 1, img->row_pitch(), img->slice_pitch() },
348                    ptr, {},
349                    { 1, row_pitch, slice_pitch },
350                    region));
351 
352    ret_object(ev, hev);
353    return CL_SUCCESS;
354 
355 } catch (error &e) {
356    return e.get();
357 }
358 
359 PUBLIC cl_int
clEnqueueCopyImage(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,const size_t * src_origin,const size_t * dst_origin,const size_t * region,cl_uint num_deps,const cl_event * deps,cl_event * ev)360 clEnqueueCopyImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
361                    const size_t *src_origin, const size_t *dst_origin,
362                    const size_t *region,
363                    cl_uint num_deps, const cl_event *deps,
364                    cl_event *ev) try {
365    image *src_img = dynamic_cast<image *>(src_obj);
366    image *dst_img = dynamic_cast<image *>(dst_obj);
367 
368    validate_base(q, num_deps, deps);
369    validate_obj(q, src_img);
370    validate_obj(q, dst_img);
371 
372    hard_event *hev = new hard_event(
373       *q, CL_COMMAND_COPY_IMAGE, { deps, deps + num_deps },
374       hard_copy_op(q, dst_obj, dst_origin, src_obj, src_origin, region));
375 
376    ret_object(ev, hev);
377    return CL_SUCCESS;
378 
379 } catch (error &e) {
380    return e.get();
381 }
382 
383 PUBLIC cl_int
clEnqueueCopyImageToBuffer(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,const size_t * src_origin,const size_t * region,size_t dst_offset,cl_uint num_deps,const cl_event * deps,cl_event * ev)384 clEnqueueCopyImageToBuffer(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
385                            const size_t *src_origin, const size_t *region,
386                            size_t dst_offset,
387                            cl_uint num_deps, const cl_event *deps,
388                            cl_event *ev) try {
389    image *src_img = dynamic_cast<image *>(src_obj);
390 
391    validate_base(q, num_deps, deps);
392    validate_obj(q, src_img);
393    validate_obj(q, dst_obj);
394 
395    hard_event *hev = new hard_event(
396       *q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, { deps, deps + num_deps },
397       soft_copy_op(q,
398                    dst_obj, { dst_offset },
399                    { 0, 0, 0 },
400                    src_obj, src_origin,
401                    { 1, src_img->row_pitch(), src_img->slice_pitch() },
402                    region));
403 
404    ret_object(ev, hev);
405    return CL_SUCCESS;
406 
407 } catch (error &e) {
408    return e.get();
409 }
410 
411 PUBLIC cl_int
clEnqueueCopyBufferToImage(cl_command_queue q,cl_mem src_obj,cl_mem dst_obj,size_t src_offset,const size_t * dst_origin,const size_t * region,cl_uint num_deps,const cl_event * deps,cl_event * ev)412 clEnqueueCopyBufferToImage(cl_command_queue q, cl_mem src_obj, cl_mem dst_obj,
413                            size_t src_offset,
414                            const size_t *dst_origin, const size_t *region,
415                            cl_uint num_deps, const cl_event *deps,
416                            cl_event *ev) try {
417    image *dst_img = dynamic_cast<image *>(src_obj);
418 
419    validate_base(q, num_deps, deps);
420    validate_obj(q, src_obj);
421    validate_obj(q, dst_img);
422 
423    hard_event *hev = new hard_event(
424       *q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, { deps, deps + num_deps },
425       soft_copy_op(q,
426                    dst_obj, dst_origin,
427                    { 1, dst_img->row_pitch(), dst_img->slice_pitch() },
428                    src_obj, { src_offset },
429                    { 0, 0, 0 },
430                    region));
431 
432    ret_object(ev, hev);
433    return CL_SUCCESS;
434 
435 } catch (error &e) {
436    return e.get();
437 }
438 
439 PUBLIC void *
clEnqueueMapBuffer(cl_command_queue q,cl_mem obj,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * deps,cl_event * ev,cl_int * errcode_ret)440 clEnqueueMapBuffer(cl_command_queue q, cl_mem obj, cl_bool blocking,
441                    cl_map_flags flags, size_t offset, size_t size,
442                    cl_uint num_deps, const cl_event *deps,
443                    cl_event *ev, cl_int *errcode_ret) try {
444    validate_base(q, num_deps, deps);
445    validate_obj(q, obj);
446 
447    if (offset > obj->size() || offset + size > obj->size())
448       throw error(CL_INVALID_VALUE);
449 
450    void *map = obj->resource(q).add_map(
451       *q, flags, blocking, { offset }, { size });
452 
453    ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_BUFFER,
454                                  { deps, deps + num_deps }));
455    ret_error(errcode_ret, CL_SUCCESS);
456    return map;
457 
458 } catch (error &e) {
459    ret_error(errcode_ret, e);
460    return NULL;
461 }
462 
463 PUBLIC void *
clEnqueueMapImage(cl_command_queue q,cl_mem obj,cl_bool blocking,cl_map_flags flags,const size_t * origin,const size_t * region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * deps,cl_event * ev,cl_int * errcode_ret)464 clEnqueueMapImage(cl_command_queue q, cl_mem obj, cl_bool blocking,
465                   cl_map_flags flags,
466                   const size_t *origin, const size_t *region,
467                   size_t *row_pitch, size_t *slice_pitch,
468                   cl_uint num_deps, const cl_event *deps,
469                   cl_event *ev, cl_int *errcode_ret) try {
470    image *img = dynamic_cast<image *>(obj);
471 
472    validate_base(q, num_deps, deps);
473    validate_obj(q, img);
474 
475    void *map = obj->resource(q).add_map(
476       *q, flags, blocking, origin, region);
477 
478    ret_object(ev, new hard_event(*q, CL_COMMAND_MAP_IMAGE,
479                                  { deps, deps + num_deps }));
480    ret_error(errcode_ret, CL_SUCCESS);
481    return map;
482 
483 } catch (error &e) {
484    ret_error(errcode_ret, e);
485    return NULL;
486 }
487 
488 PUBLIC cl_int
clEnqueueUnmapMemObject(cl_command_queue q,cl_mem obj,void * ptr,cl_uint num_deps,const cl_event * deps,cl_event * ev)489 clEnqueueUnmapMemObject(cl_command_queue q, cl_mem obj, void *ptr,
490                         cl_uint num_deps, const cl_event *deps,
491                         cl_event *ev) try {
492    validate_base(q, num_deps, deps);
493    validate_obj(q, obj);
494 
495    hard_event *hev = new hard_event(
496       *q, CL_COMMAND_UNMAP_MEM_OBJECT, { deps, deps + num_deps },
497       [=](event &) {
498          obj->resource(q).del_map(ptr);
499       });
500 
501    ret_object(ev, hev);
502    return CL_SUCCESS;
503 
504 } catch (error &e) {
505    return e.get();
506 }
507