1 //
2 // Copyright 2012 Francisco Jerez
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a
5 // copy of this software and associated documentation files (the "Software"),
6 // to deal in the Software without restriction, including without limitation
7 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 // and/or sell copies of the Software, and to permit persons to whom the
9 // Software is furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 // OTHER DEALINGS IN THE SOFTWARE.
21 //
22 
23 #include <cstring>
24 
25 #include "api/util.hpp"
26 #include "core/event.hpp"
27 #include "core/memory.hpp"
28 
29 using namespace clover;
30 
31 namespace {
32    typedef resource::vector vector_t;
33 
34    vector_t
vector(const size_t * p)35    vector(const size_t *p) {
36       return range(p, 3);
37    }
38 
39    vector_t
pitch(const vector_t & region,vector_t pitch)40    pitch(const vector_t &region, vector_t pitch) {
41       for (auto x : zip(tail(pitch),
42                         map(multiplies(), region, pitch))) {
43          // The spec defines a value of zero as the natural pitch,
44          // i.e. the unaligned size of the previous dimension.
45          if (std::get<0>(x) == 0)
46             std::get<0>(x) = std::get<1>(x);
47       }
48 
49       return pitch;
50    }
51 
52    ///
53    /// Size of a region in bytes.
54    ///
55    size_t
size(const vector_t & pitch,const vector_t & region)56    size(const vector_t &pitch, const vector_t &region) {
57       if (any_of(is_zero(), region))
58          return 0;
59       else
60          return dot(pitch, region - vector_t{ 0, 1, 1 });
61    }
62 
63    ///
64    /// Common argument checking shared by memory transfer commands.
65    ///
66    void
validate_common(command_queue & q,const ref_vector<event> & deps)67    validate_common(command_queue &q,
68                    const ref_vector<event> &deps) {
69       if (any_of([&](const event &ev) {
70                return ev.context() != q.context();
71             }, deps))
72          throw error(CL_INVALID_CONTEXT);
73    }
74 
75    ///
76    /// Common error checking for a buffer object argument.
77    ///
78    void
validate_object(command_queue & q,buffer & mem,const vector_t & origin,const vector_t & pitch,const vector_t & region)79    validate_object(command_queue &q, buffer &mem, const vector_t &origin,
80                    const vector_t &pitch, const vector_t &region) {
81       if (mem.context() != q.context())
82          throw error(CL_INVALID_CONTEXT);
83 
84       // The region must fit within the specified pitch,
85       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
86          throw error(CL_INVALID_VALUE);
87 
88       // ...and within the specified object.
89       if (dot(pitch, origin) + size(pitch, region) > mem.size())
90          throw error(CL_INVALID_VALUE);
91 
92       if (any_of(is_zero(), region))
93          throw error(CL_INVALID_VALUE);
94    }
95 
96    ///
97    /// Common error checking for an image argument.
98    ///
99    void
validate_object(command_queue & q,image & img,const vector_t & orig,const vector_t & region)100    validate_object(command_queue &q, image &img,
101                    const vector_t &orig, const vector_t &region) {
102       vector_t size = { img.width(), img.height(), img.depth() };
103 
104       if (!q.device().image_support())
105          throw error(CL_INVALID_OPERATION);
106 
107       if (img.context() != q.context())
108          throw error(CL_INVALID_CONTEXT);
109 
110       if (any_of(greater(), orig + region, size))
111          throw error(CL_INVALID_VALUE);
112 
113       if (any_of(is_zero(), region))
114          throw error(CL_INVALID_VALUE);
115    }
116 
117    ///
118    /// Common error checking for a host pointer argument.
119    ///
120    void
validate_object(command_queue & q,const void * ptr,const vector_t & orig,const vector_t & pitch,const vector_t & region)121    validate_object(command_queue &q, const void *ptr, const vector_t &orig,
122                    const vector_t &pitch, const vector_t &region) {
123       if (!ptr)
124          throw error(CL_INVALID_VALUE);
125 
126       // The region must fit within the specified pitch.
127       if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))
128          throw error(CL_INVALID_VALUE);
129    }
130 
131    ///
132    /// Common argument checking for a copy between two buffer objects.
133    ///
134    void
validate_copy(command_queue & q,buffer & dst_mem,const vector_t & dst_orig,const vector_t & dst_pitch,buffer & src_mem,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)135    validate_copy(command_queue &q, buffer &dst_mem,
136                  const vector_t &dst_orig, const vector_t &dst_pitch,
137                  buffer &src_mem,
138                  const vector_t &src_orig, const vector_t &src_pitch,
139                  const vector_t &region) {
140       if (dst_mem == src_mem) {
141          auto dst_offset = dot(dst_pitch, dst_orig);
142          auto src_offset = dot(src_pitch, src_orig);
143 
144          if (interval_overlaps()(
145                 dst_offset, dst_offset + size(dst_pitch, region),
146                 src_offset, src_offset + size(src_pitch, region)))
147             throw error(CL_MEM_COPY_OVERLAP);
148       }
149    }
150 
151    ///
152    /// Common argument checking for a copy between two image objects.
153    ///
154    void
validate_copy(command_queue & q,image & dst_img,const vector_t & dst_orig,image & src_img,const vector_t & src_orig,const vector_t & region)155    validate_copy(command_queue &q,
156                  image &dst_img, const vector_t &dst_orig,
157                  image &src_img, const vector_t &src_orig,
158                  const vector_t &region) {
159       if (dst_img.format() != src_img.format())
160          throw error(CL_IMAGE_FORMAT_MISMATCH);
161 
162       if (dst_img == src_img) {
163          if (all_of(interval_overlaps(),
164                     dst_orig, dst_orig + region,
165                     src_orig, src_orig + region))
166             throw error(CL_MEM_COPY_OVERLAP);
167       }
168    }
169 
170    ///
171    /// Checks that the host access flags of the memory object are
172    /// within the allowed set \a flags.
173    ///
174    void
validate_object_access(const memory_obj & mem,const cl_mem_flags flags)175    validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {
176       if (mem.flags() & ~flags &
177           (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |
178            CL_MEM_HOST_NO_ACCESS))
179          throw error(CL_INVALID_OPERATION);
180    }
181 
182    ///
183    /// Checks that the mapping flags are correct.
184    ///
185    void
validate_map_flags(const memory_obj & mem,const cl_map_flags flags)186    validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {
187       if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&
188           (flags & CL_MAP_WRITE_INVALIDATE_REGION))
189          throw error(CL_INVALID_VALUE);
190 
191       if (flags & CL_MAP_READ)
192          validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
193 
194       if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))
195          validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
196    }
197 
198    ///
199    /// Class that encapsulates the task of mapping an object of type
200    /// \a T.  The return value of get() should be implicitly
201    /// convertible to \a void *.
202    ///
203    template<typename T>
204    struct _map {
205       static mapping
get__anon8ee564750111::_map206       get(command_queue &q, T obj, cl_map_flags flags,
207           size_t offset, size_t size) {
208          return { q, obj->resource(q), flags, true,
209                   {{ offset }}, {{ size, 1, 1 }} };
210       }
211    };
212 
213    template<>
214    struct _map<void *> {
215       static void *
get__anon8ee564750111::_map216       get(command_queue &q, void *obj, cl_map_flags flags,
217           size_t offset, size_t size) {
218          return (char *)obj + offset;
219       }
220    };
221 
222    template<>
223    struct _map<const void *> {
224       static const void *
get__anon8ee564750111::_map225       get(command_queue &q, const void *obj, cl_map_flags flags,
226           size_t offset, size_t size) {
227          return (const char *)obj + offset;
228       }
229    };
230 
231    ///
232    /// Software copy from \a src_obj to \a dst_obj.  They can be
233    /// either pointers or memory objects.
234    ///
235    template<typename T, typename S>
236    std::function<void (event &)>
soft_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,const vector_t & dst_pitch,S src_obj,const vector_t & src_orig,const vector_t & src_pitch,const vector_t & region)237    soft_copy_op(command_queue &q,
238                 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,
239                 S src_obj, const vector_t &src_orig, const vector_t &src_pitch,
240                 const vector_t &region) {
241       return [=, &q](event &) {
242          auto dst = _map<T>::get(q, dst_obj, CL_MAP_WRITE,
243                                  dot(dst_pitch, dst_orig),
244                                  size(dst_pitch, region));
245          auto src = _map<S>::get(q, src_obj, CL_MAP_READ,
246                                  dot(src_pitch, src_orig),
247                                  size(src_pitch, region));
248          vector_t v = {};
249 
250          for (v[2] = 0; v[2] < region[2]; ++v[2]) {
251             for (v[1] = 0; v[1] < region[1]; ++v[1]) {
252                std::memcpy(
253                   static_cast<char *>(dst) + dot(dst_pitch, v),
254                   static_cast<const char *>(src) + dot(src_pitch, v),
255                   src_pitch[0] * region[0]);
256             }
257          }
258       };
259    }
260 
261    ///
262    /// Hardware copy from \a src_obj to \a dst_obj.
263    ///
264    template<typename T, typename S>
265    std::function<void (event &)>
hard_copy_op(command_queue & q,T dst_obj,const vector_t & dst_orig,S src_obj,const vector_t & src_orig,const vector_t & region)266    hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,
267                 S src_obj, const vector_t &src_orig, const vector_t &region) {
268       return [=, &q](event &) {
269          dst_obj->resource(q).copy(q, dst_orig, region,
270                                    src_obj->resource(q), src_orig);
271       };
272    }
273 }
274 
275 CLOVER_API cl_int
clEnqueueReadBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)276 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
277                     size_t offset, size_t size, void *ptr,
278                     cl_uint num_deps, const cl_event *d_deps,
279                     cl_event *rd_ev) try {
280    auto &q = obj(d_q);
281    auto &mem = obj<buffer>(d_mem);
282    auto deps = objs<wait_list_tag>(d_deps, num_deps);
283    vector_t region = { size, 1, 1 };
284    vector_t obj_origin = { offset };
285    auto obj_pitch = pitch(region, {{ 1 }});
286 
287    validate_common(q, deps);
288    validate_object(q, ptr, {}, obj_pitch, region);
289    validate_object(q, mem, obj_origin, obj_pitch, region);
290    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
291 
292    auto hev = create<hard_event>(
293       q, CL_COMMAND_READ_BUFFER, deps,
294       soft_copy_op(q, ptr, {}, obj_pitch,
295                    &mem, obj_origin, obj_pitch,
296                    region));
297 
298    ret_object(rd_ev, hev);
299    return CL_SUCCESS;
300 
301 } catch (error &e) {
302    return e.get();
303 }
304 
305 CLOVER_API cl_int
clEnqueueWriteBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,size_t offset,size_t size,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)306 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
307                      size_t offset, size_t size, const void *ptr,
308                      cl_uint num_deps, const cl_event *d_deps,
309                      cl_event *rd_ev) try {
310    auto &q = obj(d_q);
311    auto &mem = obj<buffer>(d_mem);
312    auto deps = objs<wait_list_tag>(d_deps, num_deps);
313    vector_t region = { size, 1, 1 };
314    vector_t obj_origin = { offset };
315    auto obj_pitch = pitch(region, {{ 1 }});
316 
317    validate_common(q, deps);
318    validate_object(q, mem, obj_origin, obj_pitch, region);
319    validate_object(q, ptr, {}, obj_pitch, region);
320    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
321 
322    auto hev = create<hard_event>(
323       q, CL_COMMAND_WRITE_BUFFER, deps,
324       soft_copy_op(q, &mem, obj_origin, obj_pitch,
325                    ptr, {}, obj_pitch,
326                    region));
327 
328    ret_object(rd_ev, hev);
329    return CL_SUCCESS;
330 
331 } catch (error &e) {
332    return e.get();
333 }
334 
335 CLOVER_API cl_int
clEnqueueReadBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)336 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
337                         const size_t *p_obj_origin,
338                         const size_t *p_host_origin,
339                         const size_t *p_region,
340                         size_t obj_row_pitch, size_t obj_slice_pitch,
341                         size_t host_row_pitch, size_t host_slice_pitch,
342                         void *ptr,
343                         cl_uint num_deps, const cl_event *d_deps,
344                         cl_event *rd_ev) try {
345    auto &q = obj(d_q);
346    auto &mem = obj<buffer>(d_mem);
347    auto deps = objs<wait_list_tag>(d_deps, num_deps);
348    auto region = vector(p_region);
349    auto obj_origin = vector(p_obj_origin);
350    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
351    auto host_origin = vector(p_host_origin);
352    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
353 
354    validate_common(q, deps);
355    validate_object(q, ptr, host_origin, host_pitch, region);
356    validate_object(q, mem, obj_origin, obj_pitch, region);
357    validate_object_access(mem, CL_MEM_HOST_READ_ONLY);
358 
359    auto hev = create<hard_event>(
360       q, CL_COMMAND_READ_BUFFER_RECT, deps,
361       soft_copy_op(q, ptr, host_origin, host_pitch,
362                    &mem, obj_origin, obj_pitch,
363                    region));
364 
365    ret_object(rd_ev, hev);
366    return CL_SUCCESS;
367 
368 } catch (error &e) {
369    return e.get();
370 }
371 
372 CLOVER_API cl_int
clEnqueueWriteBufferRect(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_obj_origin,const size_t * p_host_origin,const size_t * p_region,size_t obj_row_pitch,size_t obj_slice_pitch,size_t host_row_pitch,size_t host_slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)373 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
374                          const size_t *p_obj_origin,
375                          const size_t *p_host_origin,
376                          const size_t *p_region,
377                          size_t obj_row_pitch, size_t obj_slice_pitch,
378                          size_t host_row_pitch, size_t host_slice_pitch,
379                          const void *ptr,
380                          cl_uint num_deps, const cl_event *d_deps,
381                          cl_event *rd_ev) try {
382    auto &q = obj(d_q);
383    auto &mem = obj<buffer>(d_mem);
384    auto deps = objs<wait_list_tag>(d_deps, num_deps);
385    auto region = vector(p_region);
386    auto obj_origin = vector(p_obj_origin);
387    auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});
388    auto host_origin = vector(p_host_origin);
389    auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});
390 
391    validate_common(q, deps);
392    validate_object(q, mem, obj_origin, obj_pitch, region);
393    validate_object(q, ptr, host_origin, host_pitch, region);
394    validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);
395 
396    auto hev = create<hard_event>(
397       q, CL_COMMAND_WRITE_BUFFER_RECT, deps,
398       soft_copy_op(q, &mem, obj_origin, obj_pitch,
399                    ptr, host_origin, host_pitch,
400                    region));
401 
402    ret_object(rd_ev, hev);
403    return CL_SUCCESS;
404 
405 } catch (error &e) {
406    return e.get();
407 }
408 
409 CLOVER_API cl_int
clEnqueueCopyBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,size_t dst_offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)410 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
411                     size_t src_offset, size_t dst_offset, size_t size,
412                     cl_uint num_deps, const cl_event *d_deps,
413                     cl_event *rd_ev) try {
414    auto &q = obj(d_q);
415    auto &src_mem = obj<buffer>(d_src_mem);
416    auto &dst_mem = obj<buffer>(d_dst_mem);
417    auto deps = objs<wait_list_tag>(d_deps, num_deps);
418    vector_t region = { size, 1, 1 };
419    vector_t dst_origin = { dst_offset };
420    auto dst_pitch = pitch(region, {{ 1 }});
421    vector_t src_origin = { src_offset };
422    auto src_pitch = pitch(region, {{ 1 }});
423 
424    validate_common(q, deps);
425    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
426    validate_object(q, src_mem, src_origin, src_pitch, region);
427    validate_copy(q, dst_mem, dst_origin, dst_pitch,
428                  src_mem, src_origin, src_pitch, region);
429 
430    auto hev = create<hard_event>(
431       q, CL_COMMAND_COPY_BUFFER, deps,
432       hard_copy_op(q, &dst_mem, dst_origin,
433                    &src_mem, src_origin, region));
434 
435    ret_object(rd_ev, hev);
436    return CL_SUCCESS;
437 
438 } catch (error &e) {
439    return e.get();
440 }
441 
442 CLOVER_API cl_int
clEnqueueCopyBufferRect(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,size_t src_row_pitch,size_t src_slice_pitch,size_t dst_row_pitch,size_t dst_slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)443 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,
444                         cl_mem d_dst_mem,
445                         const size_t *p_src_origin, const size_t *p_dst_origin,
446                         const size_t *p_region,
447                         size_t src_row_pitch, size_t src_slice_pitch,
448                         size_t dst_row_pitch, size_t dst_slice_pitch,
449                         cl_uint num_deps, const cl_event *d_deps,
450                         cl_event *rd_ev) try {
451    auto &q = obj(d_q);
452    auto &src_mem = obj<buffer>(d_src_mem);
453    auto &dst_mem = obj<buffer>(d_dst_mem);
454    auto deps = objs<wait_list_tag>(d_deps, num_deps);
455    auto region = vector(p_region);
456    auto dst_origin = vector(p_dst_origin);
457    auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});
458    auto src_origin = vector(p_src_origin);
459    auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});
460 
461    validate_common(q, deps);
462    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
463    validate_object(q, src_mem, src_origin, src_pitch, region);
464    validate_copy(q, dst_mem, dst_origin, dst_pitch,
465                  src_mem, src_origin, src_pitch, region);
466 
467    auto hev = create<hard_event>(
468       q, CL_COMMAND_COPY_BUFFER_RECT, deps,
469       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
470                    &src_mem, src_origin, src_pitch,
471                    region));
472 
473    ret_object(rd_ev, hev);
474    return CL_SUCCESS;
475 
476 } catch (error &e) {
477    return e.get();
478 }
479 
480 CLOVER_API cl_int
clEnqueueReadImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)481 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
482                    const size_t *p_origin, const size_t *p_region,
483                    size_t row_pitch, size_t slice_pitch, void *ptr,
484                    cl_uint num_deps, const cl_event *d_deps,
485                    cl_event *rd_ev) try {
486    auto &q = obj(d_q);
487    auto &img = obj<image>(d_mem);
488    auto deps = objs<wait_list_tag>(d_deps, num_deps);
489    auto region = vector(p_region);
490    auto dst_pitch = pitch(region, {{ img.pixel_size(),
491                                      row_pitch, slice_pitch }});
492    auto src_origin = vector(p_origin);
493    auto src_pitch = pitch(region, {{ img.pixel_size(),
494                                      img.row_pitch(), img.slice_pitch() }});
495 
496    validate_common(q, deps);
497    validate_object(q, ptr, {}, dst_pitch, region);
498    validate_object(q, img, src_origin, region);
499    validate_object_access(img, CL_MEM_HOST_READ_ONLY);
500 
501    auto hev = create<hard_event>(
502       q, CL_COMMAND_READ_IMAGE, deps,
503       soft_copy_op(q, ptr, {}, dst_pitch,
504                    &img, src_origin, src_pitch,
505                    region));
506 
507    ret_object(rd_ev, hev);
508    return CL_SUCCESS;
509 
510 } catch (error &e) {
511    return e.get();
512 }
513 
514 CLOVER_API cl_int
clEnqueueWriteImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,const size_t * p_origin,const size_t * p_region,size_t row_pitch,size_t slice_pitch,const void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)515 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
516                     const size_t *p_origin, const size_t *p_region,
517                     size_t row_pitch, size_t slice_pitch, const void *ptr,
518                     cl_uint num_deps, const cl_event *d_deps,
519                     cl_event *rd_ev) try {
520    auto &q = obj(d_q);
521    auto &img = obj<image>(d_mem);
522    auto deps = objs<wait_list_tag>(d_deps, num_deps);
523    auto region = vector(p_region);
524    auto dst_origin = vector(p_origin);
525    auto dst_pitch = pitch(region, {{ img.pixel_size(),
526                                      img.row_pitch(), img.slice_pitch() }});
527    auto src_pitch = pitch(region, {{ img.pixel_size(),
528                                      row_pitch, slice_pitch }});
529 
530    validate_common(q, deps);
531    validate_object(q, img, dst_origin, region);
532    validate_object(q, ptr, {}, src_pitch, region);
533    validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);
534 
535    auto hev = create<hard_event>(
536       q, CL_COMMAND_WRITE_IMAGE, deps,
537       soft_copy_op(q, &img, dst_origin, dst_pitch,
538                    ptr, {}, src_pitch,
539                    region));
540 
541    ret_object(rd_ev, hev);
542    return CL_SUCCESS;
543 
544 } catch (error &e) {
545    return e.get();
546 }
547 
548 CLOVER_API cl_int
clEnqueueCopyImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)549 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,
550                    const size_t *p_src_origin, const size_t *p_dst_origin,
551                    const size_t *p_region,
552                    cl_uint num_deps, const cl_event *d_deps,
553                    cl_event *rd_ev) try {
554    auto &q = obj(d_q);
555    auto &src_img = obj<image>(d_src_mem);
556    auto &dst_img = obj<image>(d_dst_mem);
557    auto deps = objs<wait_list_tag>(d_deps, num_deps);
558    auto region = vector(p_region);
559    auto dst_origin = vector(p_dst_origin);
560    auto src_origin = vector(p_src_origin);
561 
562    validate_common(q, deps);
563    validate_object(q, dst_img, dst_origin, region);
564    validate_object(q, src_img, src_origin, region);
565    validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);
566 
567    auto hev = create<hard_event>(
568       q, CL_COMMAND_COPY_IMAGE, deps,
569       hard_copy_op(q, &dst_img, dst_origin,
570                    &src_img, src_origin,
571                    region));
572 
573    ret_object(rd_ev, hev);
574    return CL_SUCCESS;
575 
576 } catch (error &e) {
577    return e.get();
578 }
579 
580 CLOVER_API cl_int
clEnqueueCopyImageToBuffer(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,const size_t * p_src_origin,const size_t * p_region,size_t dst_offset,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)581 clEnqueueCopyImageToBuffer(cl_command_queue d_q,
582                            cl_mem d_src_mem, cl_mem d_dst_mem,
583                            const size_t *p_src_origin, const size_t *p_region,
584                            size_t dst_offset,
585                            cl_uint num_deps, const cl_event *d_deps,
586                            cl_event *rd_ev) try {
587    auto &q = obj(d_q);
588    auto &src_img = obj<image>(d_src_mem);
589    auto &dst_mem = obj<buffer>(d_dst_mem);
590    auto deps = objs<wait_list_tag>(d_deps, num_deps);
591    auto region = vector(p_region);
592    vector_t dst_origin = { dst_offset };
593    auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});
594    auto src_origin = vector(p_src_origin);
595    auto src_pitch = pitch(region, {{ src_img.pixel_size(),
596                                      src_img.row_pitch(),
597                                      src_img.slice_pitch() }});
598 
599    validate_common(q, deps);
600    validate_object(q, dst_mem, dst_origin, dst_pitch, region);
601    validate_object(q, src_img, src_origin, region);
602 
603    auto hev = create<hard_event>(
604       q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,
605       soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,
606                    &src_img, src_origin, src_pitch,
607                    region));
608 
609    ret_object(rd_ev, hev);
610    return CL_SUCCESS;
611 
612 } catch (error &e) {
613    return e.get();
614 }
615 
616 CLOVER_API cl_int
clEnqueueCopyBufferToImage(cl_command_queue d_q,cl_mem d_src_mem,cl_mem d_dst_mem,size_t src_offset,const size_t * p_dst_origin,const size_t * p_region,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)617 clEnqueueCopyBufferToImage(cl_command_queue d_q,
618                            cl_mem d_src_mem, cl_mem d_dst_mem,
619                            size_t src_offset,
620                            const size_t *p_dst_origin, const size_t *p_region,
621                            cl_uint num_deps, const cl_event *d_deps,
622                            cl_event *rd_ev) try {
623    auto &q = obj(d_q);
624    auto &src_mem = obj<buffer>(d_src_mem);
625    auto &dst_img = obj<image>(d_dst_mem);
626    auto deps = objs<wait_list_tag>(d_deps, num_deps);
627    auto region = vector(p_region);
628    auto dst_origin = vector(p_dst_origin);
629    auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),
630                                      dst_img.row_pitch(),
631                                      dst_img.slice_pitch() }});
632    vector_t src_origin = { src_offset };
633    auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});
634 
635    validate_common(q, deps);
636    validate_object(q, dst_img, dst_origin, region);
637    validate_object(q, src_mem, src_origin, src_pitch, region);
638 
639    auto hev = create<hard_event>(
640       q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,
641       soft_copy_op(q, &dst_img, dst_origin, dst_pitch,
642                    &src_mem, src_origin, src_pitch,
643                    region));
644 
645    ret_object(rd_ev, hev);
646    return CL_SUCCESS;
647 
648 } catch (error &e) {
649    return e.get();
650 }
651 
652 CLOVER_API void *
clEnqueueMapBuffer(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,size_t offset,size_t size,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)653 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
654                    cl_map_flags flags, size_t offset, size_t size,
655                    cl_uint num_deps, const cl_event *d_deps,
656                    cl_event *rd_ev, cl_int *r_errcode) try {
657    auto &q = obj(d_q);
658    auto &mem = obj<buffer>(d_mem);
659    auto deps = objs<wait_list_tag>(d_deps, num_deps);
660    vector_t region = { size, 1, 1 };
661    vector_t obj_origin = { offset };
662    auto obj_pitch = pitch(region, {{ 1 }});
663 
664    validate_common(q, deps);
665    validate_object(q, mem, obj_origin, obj_pitch, region);
666    validate_map_flags(mem, flags);
667 
668    void *map = mem.resource(q).add_map(q, flags, blocking, obj_origin, region);
669 
670    ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps));
671    ret_error(r_errcode, CL_SUCCESS);
672    return map;
673 
674 } catch (error &e) {
675    ret_error(r_errcode, e);
676    return NULL;
677 }
678 
679 CLOVER_API void *
clEnqueueMapImage(cl_command_queue d_q,cl_mem d_mem,cl_bool blocking,cl_map_flags flags,const size_t * p_origin,const size_t * p_region,size_t * row_pitch,size_t * slice_pitch,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev,cl_int * r_errcode)680 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,
681                   cl_map_flags flags,
682                   const size_t *p_origin, const size_t *p_region,
683                   size_t *row_pitch, size_t *slice_pitch,
684                   cl_uint num_deps, const cl_event *d_deps,
685                   cl_event *rd_ev, cl_int *r_errcode) try {
686    auto &q = obj(d_q);
687    auto &img = obj<image>(d_mem);
688    auto deps = objs<wait_list_tag>(d_deps, num_deps);
689    auto region = vector(p_region);
690    auto origin = vector(p_origin);
691 
692    validate_common(q, deps);
693    validate_object(q, img, origin, region);
694    validate_map_flags(img, flags);
695 
696    void *map = img.resource(q).add_map(q, flags, blocking, origin, region);
697 
698    ret_object(rd_ev, create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps));
699    ret_error(r_errcode, CL_SUCCESS);
700    return map;
701 
702 } catch (error &e) {
703    ret_error(r_errcode, e);
704    return NULL;
705 }
706 
707 CLOVER_API cl_int
clEnqueueUnmapMemObject(cl_command_queue d_q,cl_mem d_mem,void * ptr,cl_uint num_deps,const cl_event * d_deps,cl_event * rd_ev)708 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,
709                         cl_uint num_deps, const cl_event *d_deps,
710                         cl_event *rd_ev) try {
711    auto &q = obj(d_q);
712    auto &mem = obj(d_mem);
713    auto deps = objs<wait_list_tag>(d_deps, num_deps);
714 
715    validate_common(q, deps);
716 
717    auto hev = create<hard_event>(
718       q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,
719       [=, &q, &mem](event &) {
720          mem.resource(q).del_map(ptr);
721       });
722 
723    ret_object(rd_ev, hev);
724    return CL_SUCCESS;
725 
726 } catch (error &e) {
727    return e.get();
728 }
729 
730 CLOVER_API cl_int
clEnqueueMigrateMemObjects(cl_command_queue command_queue,cl_uint num_mem_objects,const cl_mem * mem_objects,cl_mem_migration_flags flags,cl_uint num_events_in_wait_list,const cl_event * event_wait_list,cl_event * event)731 clEnqueueMigrateMemObjects(cl_command_queue command_queue,
732                            cl_uint num_mem_objects,
733                            const cl_mem *mem_objects,
734                            cl_mem_migration_flags flags,
735                            cl_uint num_events_in_wait_list,
736                            const cl_event *event_wait_list,
737                            cl_event *event) {
738    CLOVER_NOT_SUPPORTED_UNTIL("1.2");
739    return CL_INVALID_VALUE;
740 }
741