1 /*
2  * cl_kernel.cpp - CL kernel
3  *
4  *  Copyright (c) 2015 Intel Corporation
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  *
18  * Author: Wind Yuan <feng.yuan@intel.com>
19  */
20 
21 #include "cl_kernel.h"
22 #include "cl_context.h"
23 #include "cl_device.h"
24 #include "file_handle.h"
25 
26 #include <sys/stat.h>
27 
28 #define ENABLE_DEBUG_KERNEL 0
29 
30 #define XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE 0
31 
32 namespace XCam {
33 
34 CLKernel::KernelMap CLKernel::_kernel_map;
35 Mutex CLKernel::_kernel_map_mutex;
36 
37 static char*
default_cache_path()38 default_cache_path () {
39     static char path[XCAM_MAX_STR_SIZE] = {0};
40     snprintf (
41         path, XCAM_MAX_STR_SIZE - 1,
42         "%s/%s", std::getenv ("HOME"), ".xcam/");
43 
44     return path;
45 }
46 
47 const char* CLKernel::_kernel_cache_path = default_cache_path ();
48 
CLKernel(const SmartPtr<CLContext> & context,const char * name)49 CLKernel::CLKernel (const SmartPtr<CLContext> &context, const char *name)
50     : _name (NULL)
51     , _kernel_id (NULL)
52     , _context (context)
53 {
54     XCAM_ASSERT (context.ptr ());
55     //XCAM_ASSERT (name);
56 
57     if (name)
58         _name = strndup (name, XCAM_MAX_STR_SIZE);
59 
60     set_default_work_size ();
61 
62     XCAM_OBJ_PROFILING_INIT;
63 }
64 
~CLKernel()65 CLKernel::~CLKernel ()
66 {
67     destroy ();
68     if (_name)
69         xcam_free (_name);
70 }
71 
72 void
destroy()73 CLKernel::destroy ()
74 {
75     if (!_parent_kernel.ptr ())
76         _context->destroy_kernel_id (_kernel_id);
77 }
78 
79 static void
get_string_key_id(const char * str,uint32_t len,uint8_t key_id[8])80 get_string_key_id (const char *str, uint32_t len, uint8_t key_id[8])
81 {
82     uint32_t key[2];
83     uint32_t *ptr = (uint32_t*)(str);
84     uint32_t aligned_len = 0;
85     uint32_t i = 0;
86 
87     xcam_mem_clear (key);
88     if (!len)
89         len = strlen (str);
90     aligned_len = XCAM_ALIGN_DOWN (len, 8);
91 
92     for (i = 0; i < aligned_len / 8; ++i) {
93         key[0] ^= ptr[0];
94         key[1] ^= ptr[1];
95         ptr += 2;
96     }
97     memcpy (key_id, key, 8);
98     len -= aligned_len;
99     str += aligned_len;
100     for (i = 0; i < len; ++i) {
101         key_id[i] ^= (uint8_t)str[i];
102     }
103 }
104 
105 XCamReturn
build_kernel(const XCamKernelInfo & info,const char * options)106 CLKernel::build_kernel (const XCamKernelInfo& info, const char* options)
107 {
108     KernelMap::iterator i_kernel;
109     SmartPtr<CLKernel> single_kernel;
110     char key_str[1024];
111     uint8_t body_key[8];
112     std::string key;
113     XCamReturn ret = XCAM_RETURN_NO_ERROR;
114 
115     XCAM_FAIL_RETURN (ERROR, info.kernel_name, XCAM_RETURN_ERROR_PARAM, "build kernel failed since kernel name null");
116 
117     xcam_mem_clear (body_key);
118     get_string_key_id (info.kernel_body, info.kernel_body_len, body_key);
119     snprintf (
120         key_str, sizeof(key_str),
121         "%s#%02x%02x%02x%02x%02x%02x%02x%02x#%s",
122         info.kernel_name,
123         body_key[0], body_key[1], body_key[2], body_key[3], body_key[4], body_key[5], body_key[6], body_key[7],
124         XCAM_STR(options));
125     key = key_str;
126 
127     char temp_filename[XCAM_MAX_STR_SIZE] = {0};
128     char cache_filename[XCAM_MAX_STR_SIZE] = {0};
129     FileHandle temp_file;
130     FileHandle cache_file;
131     size_t read_cache_size = 0;
132     size_t write_cache_size = 0;
133     uint8_t *kernel_cache = NULL;
134     bool load_cache = false;
135     struct timeval ts;
136 
137     const char* cache_path = std::getenv ("XCAM_CL_KERNEL_CACHE_PATH");
138     if (NULL == cache_path) {
139         cache_path = _kernel_cache_path;
140     }
141 
142     snprintf (
143         cache_filename, XCAM_MAX_STR_SIZE - 1,
144         "%s/%s",
145         cache_path, key_str);
146 
147     {
148         SmartLock locker (_kernel_map_mutex);
149 
150         i_kernel = _kernel_map.find (key);
151         if (i_kernel == _kernel_map.end ()) {
152             SmartPtr<CLContext>  context = get_context ();
153             single_kernel = new CLKernel (context, info.kernel_name);
154             XCAM_ASSERT (single_kernel.ptr ());
155 
156             if (access (cache_path, F_OK) == -1) {
157                 mkdir (cache_path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
158             }
159 
160             ret = cache_file.open (cache_filename, "r");
161             if (ret == XCAM_RETURN_NO_ERROR) {
162                 cache_file.get_file_size (read_cache_size);
163                 if (read_cache_size > 0) {
164                     kernel_cache = (uint8_t*) xcam_malloc0 (sizeof (uint8_t) * (read_cache_size + 1));
165                     if (NULL != kernel_cache) {
166                         cache_file.read_file (kernel_cache, read_cache_size);
167                         cache_file.close ();
168 
169                         ret = single_kernel->load_from_binary (kernel_cache, read_cache_size);
170                         xcam_free (kernel_cache);
171                         kernel_cache = NULL;
172 
173                         XCAM_FAIL_RETURN (
174                             ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
175                             "build kernel(%s) from binary failed", key_str);
176 
177                         load_cache = true;
178                     }
179                 }
180             } else {
181                 XCAM_LOG_DEBUG ("open kernel cache file to read failed ret(%d)", ret);
182             }
183 
184             if (load_cache == false) {
185                 ret = single_kernel->load_from_source (info.kernel_body, strlen (info.kernel_body), &kernel_cache, &write_cache_size, options);
186                 XCAM_FAIL_RETURN (
187                     ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
188                     "build kernel(%s) from source failed", key_str);
189             }
190 
191             _kernel_map.insert (std::make_pair (key, single_kernel));
192             //_kernel_map[key] = single_kernel;
193         } else {
194             single_kernel = i_kernel->second;
195         }
196     }
197 
198     if (load_cache == false && NULL != kernel_cache) {
199         gettimeofday (&ts, NULL);
200         snprintf (
201             temp_filename, XCAM_MAX_STR_SIZE - 1,
202             "%s." XCAM_TIMESTAMP_FORMAT,
203             cache_filename, XCAM_TIMESTAMP_ARGS (XCAM_TIMEVAL_2_USEC (ts)));
204 
205         ret = temp_file.open (temp_filename, "wb");
206         if (ret == XCAM_RETURN_NO_ERROR) {
207             ret = temp_file.write_file (kernel_cache, write_cache_size);
208             temp_file.close ();
209             if (ret == XCAM_RETURN_NO_ERROR && write_cache_size > 0) {
210                 rename (temp_filename, cache_filename);
211             } else {
212                 remove (temp_filename);
213             }
214         } else {
215             XCAM_LOG_ERROR ("open kernel cache file to write failed ret(%d)", ret);
216         }
217         xcam_free (kernel_cache);
218         kernel_cache = NULL;
219     }
220 
221     XCAM_FAIL_RETURN (
222         ERROR, (single_kernel.ptr () && single_kernel->is_valid ()), XCAM_RETURN_ERROR_UNKNOWN,
223         "build kernel(%s) failed, unknown error", key_str);
224 
225     ret = this->clone (single_kernel);
226     XCAM_FAIL_RETURN (
227         ERROR, ret == XCAM_RETURN_NO_ERROR, ret,
228         "load kernel(%s) from kernel failed", key_str);
229     return ret;
230 }
231 
232 XCamReturn
load_from_source(const char * source,size_t length,uint8_t ** gen_binary,size_t * binary_size,const char * build_option)233 CLKernel::load_from_source (
234     const char *source, size_t length,
235     uint8_t **gen_binary, size_t *binary_size,
236     const char *build_option)
237 {
238     cl_kernel new_kernel_id = NULL;
239 
240     XCAM_ASSERT (source);
241     if (!source) {
242         XCAM_LOG_WARNING ("kernel:%s source empty", XCAM_STR (_name));
243         return XCAM_RETURN_ERROR_PARAM;
244     }
245 
246     if (_kernel_id) {
247         XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
248         return XCAM_RETURN_ERROR_PARAM;
249     }
250 
251     XCAM_ASSERT (_context.ptr ());
252 
253     if (length == 0)
254         length = strlen (source);
255 
256     new_kernel_id =
257         _context->generate_kernel_id (
258             this,
259             (const uint8_t *)source, length,
260             CLContext::KERNEL_BUILD_SOURCE,
261             gen_binary, binary_size,
262             build_option);
263     XCAM_FAIL_RETURN(
264         WARNING,
265         new_kernel_id != NULL,
266         XCAM_RETURN_ERROR_CL,
267         "cl kernel(%s) load from source failed", XCAM_STR (_name));
268 
269     _kernel_id = new_kernel_id;
270     return XCAM_RETURN_NO_ERROR;
271 }
272 
273 XCamReturn
load_from_binary(const uint8_t * binary,size_t length)274 CLKernel::load_from_binary (const uint8_t *binary, size_t length)
275 {
276     cl_kernel new_kernel_id = NULL;
277 
278     XCAM_ASSERT (binary);
279     if (!binary || !length) {
280         XCAM_LOG_WARNING ("kernel:%s binary empty", XCAM_STR (_name));
281         return XCAM_RETURN_ERROR_PARAM;
282     }
283 
284     if (_kernel_id) {
285         XCAM_LOG_WARNING ("kernel:%s already build yet", XCAM_STR (_name));
286         return XCAM_RETURN_ERROR_PARAM;
287     }
288 
289     XCAM_ASSERT (_context.ptr ());
290 
291     new_kernel_id =
292         _context->generate_kernel_id (
293             this,
294             binary, length,
295             CLContext::KERNEL_BUILD_BINARY,
296             NULL, NULL,
297             NULL);
298     XCAM_FAIL_RETURN(
299         WARNING,
300         new_kernel_id != NULL,
301         XCAM_RETURN_ERROR_CL,
302         "cl kernel(%s) load from binary failed", XCAM_STR (_name));
303 
304     _kernel_id = new_kernel_id;
305     return XCAM_RETURN_NO_ERROR;
306 }
307 
308 XCamReturn
clone(SmartPtr<CLKernel> kernel)309 CLKernel::clone (SmartPtr<CLKernel> kernel)
310 {
311     XCAM_FAIL_RETURN (
312         WARNING,
313         kernel.ptr () && kernel->is_valid (),
314         XCAM_RETURN_ERROR_CL,
315         "cl kernel(%s) load from kernel failed", XCAM_STR (_name));
316     _kernel_id = kernel->get_kernel_id ();
317     _parent_kernel = kernel;
318     if (!_name && kernel->get_kernel_name ()) {
319         _name = strndup (kernel->get_kernel_name (), XCAM_MAX_STR_SIZE);
320     }
321     return XCAM_RETURN_NO_ERROR;
322 }
323 
324 XCamReturn
set_arguments(const CLArgList & args,const CLWorkSize & work_size)325 CLKernel::set_arguments (const CLArgList &args, const CLWorkSize &work_size)
326 {
327     XCamReturn ret = XCAM_RETURN_NO_ERROR;
328     uint32_t i_count = 0;
329 
330     XCAM_FAIL_RETURN (
331         ERROR, _arg_list.empty (), XCAM_RETURN_ERROR_PARAM,
332         "cl image kernel(%s) arguments was already set, can NOT be set twice", get_kernel_name ());
333 
334     for (CLArgList::const_iterator iter = args.begin (); iter != args.end (); ++iter, ++i_count) {
335         const SmartPtr<CLArgument> &arg = *iter;
336         XCAM_FAIL_RETURN (
337             WARNING, arg.ptr (),
338             XCAM_RETURN_ERROR_PARAM, "cl image kernel(%s) argc(%d) is NULL", get_kernel_name (), i_count);
339 
340         void *adress = NULL;
341         uint32_t size = 0;
342         arg->get_value (adress, size);
343         ret = set_argument (i_count, adress, size);
344         XCAM_FAIL_RETURN (
345             WARNING, ret == XCAM_RETURN_NO_ERROR,
346             ret, "cl image kernel(%s) set argc(%d) failed", get_kernel_name (), i_count);
347     }
348 
349     ret = set_work_size (work_size);
350     XCAM_FAIL_RETURN (
351         WARNING, ret == XCAM_RETURN_NO_ERROR, ret,
352         "cl image kernel(%s) set worksize(global:%dx%dx%d, local:%dx%dx%d) failed",
353         XCAM_STR(get_kernel_name ()),
354         (int)work_size.global[0], (int)work_size.global[1], (int)work_size.global[2],
355         (int)work_size.local[0], (int)work_size.local[1], (int)work_size.local[2]);
356 
357     _arg_list = args;
358     return ret;
359 }
360 
361 XCamReturn
set_argument(uint32_t arg_i,void * arg_addr,uint32_t arg_size)362 CLKernel::set_argument (uint32_t arg_i, void *arg_addr, uint32_t arg_size)
363 {
364     cl_int error_code = clSetKernelArg (_kernel_id, arg_i, arg_size, arg_addr);
365     if (error_code != CL_SUCCESS) {
366         XCAM_LOG_DEBUG ("kernel(%s) set arg_i(%d) failed", _name, arg_i);
367         return XCAM_RETURN_ERROR_CL;
368     }
369     return XCAM_RETURN_NO_ERROR;
370 }
371 
372 XCamReturn
set_work_size(const CLWorkSize & work_size)373 CLKernel::set_work_size (const CLWorkSize &work_size)
374 {
375     uint32_t i = 0;
376     uint32_t work_group_size = 1;
377     const CLDevieInfo &dev_info = CLDevice::instance ()->get_device_info ();
378 
379     XCAM_FAIL_RETURN (
380         WARNING,
381         work_size.dim <= dev_info.max_work_item_dims,
382         XCAM_RETURN_ERROR_PARAM,
383         "kernel(%s) work dims(%d) greater than device max dims(%d)",
384         _name, work_size.dim, dev_info.max_work_item_dims);
385 
386     for (i = 0; i < work_size.dim; ++i) {
387         work_group_size *= work_size.local [i];
388 
389         XCAM_FAIL_RETURN (
390             WARNING,
391             work_size.local [i] <= dev_info.max_work_item_sizes [i],
392             XCAM_RETURN_ERROR_PARAM,
393             "kernel(%s) work item(%d) size:%d is greater than device max work item size(%d)",
394             _name, i, (uint32_t)work_size.local [i], (uint32_t)dev_info.max_work_item_sizes [i]);
395     }
396 
397     XCAM_FAIL_RETURN (
398         WARNING,
399         work_group_size == 0 || work_group_size <= dev_info.max_work_group_size,
400         XCAM_RETURN_ERROR_PARAM,
401         "kernel(%s) work-group-size:%d is greater than device max work-group-size(%d)",
402         _name, work_group_size, (uint32_t)dev_info.max_work_group_size);
403 
404     _work_size = work_size;
405 
406     return XCAM_RETURN_NO_ERROR;
407 }
408 
409 void
set_default_work_size()410 CLKernel::set_default_work_size ()
411 {
412     _work_size.dim = XCAM_DEFAULT_IMAGE_DIM;
413     for (uint32_t i = 0; i < _work_size.dim; ++i) {
414         //_global_work_size [i] = XCAM_CL_KERNEL_DEFAULT_GLOBAL_WORK_SIZE;
415         _work_size.local [i] = XCAM_CL_KERNEL_DEFAULT_LOCAL_WORK_SIZE;
416     }
417 }
418 
419 struct KernelUserData {
420     SmartPtr<CLKernel>  kernel;
421     SmartPtr<CLEvent>   event;
422     CLArgList           arg_list;
423 
KernelUserDataXCam::KernelUserData424     KernelUserData (const SmartPtr<CLKernel> &k, SmartPtr<CLEvent> &e)
425         : kernel (k)
426         , event (e)
427     {}
428 };
429 
430 void
event_notify(cl_event event,cl_int status,void * data)431 CLKernel::event_notify (cl_event event, cl_int status, void* data)
432 {
433     KernelUserData *kernel_data = (KernelUserData *)data;
434     XCAM_ASSERT (event == kernel_data->event->get_event_id ());
435     XCAM_UNUSED (status);
436     XCAM_UNUSED (event);
437 
438     delete kernel_data;
439 }
440 
441 XCamReturn
execute(const SmartPtr<CLKernel> self,bool block,CLEventList & events,SmartPtr<CLEvent> & event_out)442 CLKernel::execute (
443     const SmartPtr<CLKernel> self,
444     bool block,
445     CLEventList &events,
446     SmartPtr<CLEvent> &event_out)
447 {
448     XCAM_ASSERT (self.ptr () == this);
449     XCAM_ASSERT (_context.ptr ());
450     SmartPtr<CLEvent> kernel_event = event_out;
451 
452     if (!block && !kernel_event.ptr ()) {
453         kernel_event = new CLEvent;
454     }
455 
456 #if ENABLE_DEBUG_KERNEL
457     XCAM_OBJ_PROFILING_START;
458 #endif
459 
460     XCamReturn ret = _context->execute_kernel (self, NULL, events, kernel_event);
461 
462     XCAM_FAIL_RETURN (
463         ERROR,
464         ret == XCAM_RETURN_NO_ERROR,
465         ret,
466         "kernel(%s) execute failed", XCAM_STR(_name));
467 
468 
469     if (block) {
470         _context->finish ();
471     } else {
472         XCAM_ASSERT (kernel_event.ptr () && kernel_event->get_event_id ());
473         KernelUserData *user_data = new KernelUserData (self, kernel_event);
474         user_data->arg_list.swap (_arg_list);
475         ret = _context->set_event_callback (kernel_event, CL_COMPLETE, event_notify, user_data);
476         if (ret != XCAM_RETURN_NO_ERROR) {
477             XCAM_LOG_WARNING ("kernel(%s) set event callback failed", XCAM_STR (_name));
478             _context->finish ();
479             delete user_data;
480         }
481     }
482     _arg_list.clear ();
483 
484 #if ENABLE_DEBUG_KERNEL
485     _context->finish ();
486     char name[1024];
487     snprintf (name, 1024, "%s-%p", XCAM_STR (_name), this);
488     XCAM_OBJ_PROFILING_END (name, XCAM_OBJ_DUR_FRAME_NUM);
489 #endif
490     return ret;
491 }
492 
493 };
494