1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11 %                                                                             %
12 %                                                                             %
13 %                       MagickCore Acceleration Methods                       %
14 %                                                                             %
15 %                              Software Design                                %
16 %                                  Cristy                                     %
17 %                               SiuChi Chan                                   %
18 %                              Guansong Zhang                                 %
19 %                               January 2010                                  %
20 %                               Dirk Lemstra                                  %
21 %                                April 2016                                   %
22 %                                                                             %
23 %                                                                             %
24 %  Copyright 1999-2021 ImageMagick Studio LLC, a non-profit organization      %
25 %  dedicated to making software imaging solutions freely available.           %
26 %                                                                             %
27 %  You may not use this file except in compliance with the License.  You may  %
28 %  obtain a copy of the License at                                            %
29 %                                                                             %
30 %    https://imagemagick.org/script/license.php                               %
31 %                                                                             %
32 %  Unless required by applicable law or agreed to in writing, software        %
33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35 %  See the License for the specific language governing permissions and        %
36 %  limitations under the License.                                             %
37 %                                                                             %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40 
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82 
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85 
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87 
88 /*
89   Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92 
93 /*
94   Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98   BoxWeightingFunction,
99   TriangleWeightingFunction,
100   HannWeightingFunction,
101   HammingWeightingFunction,
102   BlackmanWeightingFunction,
103   CubicBCWeightingFunction,
104   SincWeightingFunction,
105   SincFastWeightingFunction,
106   LastWeightingFunction
107 };
108 
109 /*
110   Helper functions.
111 */
checkAccelerateCondition(const Image * image)112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114   /* only direct class images are supported */
115   if (image->storage_class != DirectClass)
116     return(MagickFalse);
117 
118   /* check if the image's colorspace is supported */
119   if (image->colorspace != RGBColorspace &&
120       image->colorspace != sRGBColorspace &&
121       image->colorspace != LinearGRAYColorspace &&
122       image->colorspace != GRAYColorspace)
123     return(MagickFalse);
124 
125   /* check if the virtual pixel method is compatible with the OpenCL implementation */
126   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
127       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
128     return(MagickFalse);
129 
130   /* check if the image has mask */
131   if (((image->channels & ReadMaskChannel) != 0) ||
132       ((image->channels & WriteMaskChannel) != 0) ||
133       ((image->channels & CompositeMaskChannel) != 0))
134     return(MagickFalse);
135 
136   if (image->number_channels > 4)
137     return(MagickFalse);
138 
139   /* check if pixel order is R */
140   if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
141     return(MagickFalse);
142 
143   if (image->number_channels == 1)
144     return(MagickTrue);
145 
146   /* check if pixel order is RA */
147   if ((image->number_channels == 2) &&
148       (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
149     return(MagickTrue);
150 
151   if (image->number_channels == 2)
152     return(MagickFalse);
153 
154   /* check if pixel order is RGB */
155   if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
156       (GetPixelChannelOffset(image,BluePixelChannel) != 2))
157     return(MagickFalse);
158 
159   if (image->number_channels == 3)
160     return(MagickTrue);
161 
162   /* check if pixel order is RGBA */
163   if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
164     return(MagickFalse);
165 
166   return(MagickTrue);
167 }
168 
checkAccelerateConditionRGBA(const Image * image)169 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
170 {
171   if (checkAccelerateCondition(image) == MagickFalse)
172     return(MagickFalse);
173 
174   /* the order will be RGBA if the image has 4 channels */
175   if (image->number_channels != 4)
176     return(MagickFalse);
177 
178   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
179       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
180       (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
181       (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
182     return(MagickFalse);
183 
184   return(MagickTrue);
185 }
186 
checkPixelIntensity(const Image * image,const PixelIntensityMethod method)187 static MagickBooleanType checkPixelIntensity(const Image *image,
188   const PixelIntensityMethod method)
189 {
190   /* EncodePixelGamma and DecodePixelGamma are not supported */
191   if ((method == Rec601LumaPixelIntensityMethod) ||
192       (method == Rec709LumaPixelIntensityMethod))
193     {
194       if (image->colorspace == RGBColorspace)
195         return(MagickFalse);
196     }
197 
198   if ((method == Rec601LuminancePixelIntensityMethod) ||
199       (method == Rec709LuminancePixelIntensityMethod))
200     {
201       if (image->colorspace == sRGBColorspace)
202         return(MagickFalse);
203     }
204 
205   return(MagickTrue);
206 }
207 
checkHistogramCondition(const Image * image,const PixelIntensityMethod method)208 static MagickBooleanType checkHistogramCondition(const Image *image,
209   const PixelIntensityMethod method)
210 {
211   /* ensure this is the only pass get in for now. */
212   if ((image->channel_mask & SyncChannels) == 0)
213     return MagickFalse;
214 
215   return(checkPixelIntensity(image,method));
216 }
217 
getOpenCLEnvironment(ExceptionInfo * exception)218 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
219 {
220   MagickCLEnv
221     clEnv;
222 
223   clEnv=GetCurrentOpenCLEnv();
224   if (clEnv == (MagickCLEnv) NULL)
225     return((MagickCLEnv) NULL);
226 
227   if (clEnv->enabled == MagickFalse)
228     return((MagickCLEnv) NULL);
229 
230   if (InitializeOpenCL(clEnv,exception) == MagickFalse)
231     return((MagickCLEnv) NULL);
232 
233   return(clEnv);
234 }
235 
cloneImage(const Image * image,ExceptionInfo * exception)236 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
237 {
238   Image
239     *clone;
240 
241   if (((image->channel_mask & RedChannel) != 0) &&
242       ((image->channel_mask & GreenChannel) != 0) &&
243       ((image->channel_mask & BlueChannel) != 0) &&
244       ((image->channel_mask & AlphaChannel) != 0))
245     clone=CloneImage(image,0,0,MagickTrue,exception);
246   else
247     {
248       clone=CloneImage(image,0,0,MagickTrue,exception);
249       if (clone != (Image *) NULL)
250         SyncImagePixelCache(clone,exception);
251     }
252   return(clone);
253 }
254 
255 /* pad the global workgroup size to the next multiple of
256    the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)257 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
258   const unsigned int orgGlobalSize,const unsigned int localGroupSize)
259 {
260   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
261 }
262 
createKernelInfo(MagickCLDevice device,const double radius,const double sigma,cl_uint * width,ExceptionInfo * exception)263 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
264   const double sigma,cl_uint *width,ExceptionInfo *exception)
265 {
266   char
267     geometry[MagickPathExtent];
268 
269   cl_mem
270     imageKernelBuffer;
271 
272   float
273     *kernelBufferPtr;
274 
275   KernelInfo
276     *kernel;
277 
278   ssize_t
279     i;
280 
281   (void) FormatLocaleString(geometry,MagickPathExtent,
282     "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
283   kernel=AcquireKernelInfo(geometry,exception);
284   if (kernel == (KernelInfo *) NULL)
285   {
286     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
287       ResourceLimitWarning,"AcquireKernelInfo failed.",".");
288     return((cl_mem) NULL);
289   }
290   kernelBufferPtr=(float *) AcquireMagickMemory(kernel->width*
291     sizeof(*kernelBufferPtr));
292   if (kernelBufferPtr == (float *) NULL)
293     {
294       kernel=DestroyKernelInfo(kernel);
295       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
296         ResourceLimitWarning,"MemoryAllocationFailed.",".");
297       return((cl_mem) NULL);
298     }
299   for (i = 0; i < (ssize_t) kernel->width; i++)
300     kernelBufferPtr[i]=(float) kernel->values[i];
301   imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
302     CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
303   *width=(cl_uint) kernel->width;
304   kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
305   kernel=DestroyKernelInfo(kernel);
306   if (imageKernelBuffer == (cl_mem) NULL)
307     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
308       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
309   return(imageKernelBuffer);
310 }
311 
LaunchHistogramKernel(MagickCLEnv clEnv,MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)312 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
313   MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
314   cl_mem histogramBuffer,Image *image,const ChannelType channel,
315   ExceptionInfo *exception)
316 {
317   MagickBooleanType
318     outputReady;
319 
320   cl_int
321     clStatus;
322 
323   cl_kernel
324     histogramKernel;
325 
326   cl_event
327     event;
328 
329   cl_uint
330     colorspace,
331     method;
332 
333   ssize_t
334     i;
335 
336   size_t
337     global_work_size[2];
338 
339   histogramKernel = NULL;
340 
341   outputReady = MagickFalse;
342   colorspace = image->colorspace;
343   method = image->intensity;
344 
345   /* get the OpenCL kernel */
346   histogramKernel = AcquireOpenCLKernel(device,"Histogram");
347   if (histogramKernel == NULL)
348   {
349     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
350     goto cleanup;
351   }
352 
353   /* set the kernel arguments */
354   i = 0;
355   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
356   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
357   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
358   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
359   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
360   if (clStatus != CL_SUCCESS)
361   {
362     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
363     goto cleanup;
364   }
365 
366   /* launch the kernel */
367   global_work_size[0] = image->columns;
368   global_work_size[1] = image->rows;
369 
370   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
371 
372   if (clStatus != CL_SUCCESS)
373   {
374     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
375     goto cleanup;
376   }
377   RecordProfileData(device,histogramKernel,event);
378 
379   outputReady = MagickTrue;
380 
381 cleanup:
382 
383   if (histogramKernel!=NULL)
384     ReleaseOpenCLKernel(histogramKernel);
385 
386   return(outputReady);
387 }
388 
389 /*
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391 %                                                                             %
392 %                                                                             %
393 %                                                                             %
394 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
395 %                                                                             %
396 %                                                                             %
397 %                                                                             %
398 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
399 */
400 
ComputeAddNoiseImage(const Image * image,MagickCLEnv clEnv,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)401 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
402   const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
403 {
404   cl_command_queue
405     queue;
406 
407   cl_float
408     cl_attenuate;
409 
410   cl_int
411     status;
412 
413   cl_kernel
414     addNoiseKernel;
415 
416   cl_mem
417     filteredImageBuffer,
418     imageBuffer;
419 
420   cl_uint
421     bufferLength,
422     inputPixelCount,
423     number_channels,
424     numRandomNumberPerPixel,
425     pixelsPerWorkitem,
426     seed0,
427     seed1,
428     workItemCount;
429 
430   const unsigned long
431     *s;
432 
433   MagickBooleanType
434     outputReady;
435 
436   MagickCLDevice
437     device;
438 
439   Image
440     *filteredImage;
441 
442   RandomInfo
443     *randomInfo;
444 
445   size_t
446     gsize[1],
447     i,
448     lsize[1],
449     numRandPerChannel;
450 
451   filteredImage=NULL;
452   imageBuffer=NULL;
453   filteredImageBuffer=NULL;
454   addNoiseKernel=NULL;
455   outputReady=MagickFalse;
456 
457   device=RequestOpenCLDevice(clEnv);
458   queue=AcquireOpenCLCommandQueue(device);
459   if (queue == (cl_command_queue) NULL)
460     goto cleanup;
461   filteredImage=cloneImage(image,exception);
462   if (filteredImage == (Image *) NULL)
463     goto cleanup;
464   if (filteredImage->number_channels != image->number_channels)
465     goto cleanup;
466   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
467   if (imageBuffer == (cl_mem) NULL)
468     goto cleanup;
469   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
470   if (filteredImageBuffer == (cl_mem) NULL)
471     goto cleanup;
472 
473   /* find out how many random numbers needed by pixel */
474   numRandPerChannel=0;
475   numRandomNumberPerPixel=0;
476   switch (noise_type)
477   {
478     case UniformNoise:
479     case ImpulseNoise:
480     case LaplacianNoise:
481     case RandomNoise:
482     default:
483       numRandPerChannel=1;
484       break;
485     case GaussianNoise:
486     case MultiplicativeGaussianNoise:
487     case PoissonNoise:
488       numRandPerChannel=2;
489       break;
490   };
491   if (GetPixelRedTraits(image) != UndefinedPixelTrait)
492     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
493   if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
494     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
495   if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
496     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
497   if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
498     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
499 
500   addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
501   if (addNoiseKernel == (cl_kernel) NULL)
502   {
503     (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
504       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
505     goto cleanup;
506   }
507 
508   /* 256 work items per group, 2 groups per CU */
509   workItemCount=device->max_compute_units*2*256;
510   inputPixelCount=(cl_int) (image->columns*image->rows);
511   pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
512   pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
513   lsize[0]=256;
514   gsize[0]=workItemCount;
515 
516   randomInfo=AcquireRandomInfo();
517   s=GetRandomInfoSeed(randomInfo);
518   seed0=s[0];
519   (void) GetPseudoRandomValue(randomInfo);
520   seed1=s[0];
521   randomInfo=DestroyRandomInfo(randomInfo);
522 
523   number_channels=(cl_uint) image->number_channels;
524   bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
525   cl_attenuate=(cl_float) attenuate;
526 
527   i=0;
528   status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
529   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
530   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
531   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
532   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
533   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
534   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&cl_attenuate);
535   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
536   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
537   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
538   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
539   if (status != CL_SUCCESS)
540   {
541     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
542       ResourceLimitWarning,"clSetKernelArg failed.",".");
543     goto cleanup;
544   }
545 
546   outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
547     lsize,image,filteredImage,MagickFalse,exception);
548 
549 cleanup:
550 
551   if (imageBuffer != (cl_mem) NULL)
552     ReleaseOpenCLMemObject(imageBuffer);
553   if (filteredImageBuffer != (cl_mem) NULL)
554     ReleaseOpenCLMemObject(filteredImageBuffer);
555   if (addNoiseKernel != (cl_kernel) NULL)
556     ReleaseOpenCLKernel(addNoiseKernel);
557   if (queue != (cl_command_queue) NULL)
558     ReleaseOpenCLCommandQueue(device,queue);
559   if (device != (MagickCLDevice) NULL)
560     ReleaseOpenCLDevice(device);
561   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
562     filteredImage=DestroyImage(filteredImage);
563 
564   return(filteredImage);
565 }
566 
AccelerateAddNoiseImage(const Image * image,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)567 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
568   const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
569 {
570   /* Temporary disabled because of repetition.
571 
572   Image
573     *filteredImage;
574 
575   MagickCLEnv
576     clEnv;
577 
578   assert(image != NULL);
579   assert(exception != (ExceptionInfo *) NULL);
580 
581   if (checkAccelerateCondition(image) == MagickFalse)
582     return((Image *) NULL);
583 
584   clEnv=getOpenCLEnvironment(exception);
585   if (clEnv == (MagickCLEnv) NULL)
586     return((Image *) NULL);
587 
588   filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,attenuate,
589     exception);
590   return(filteredImage);
591   */
592   magick_unreferenced(image);
593   magick_unreferenced(noise_type);
594   magick_unreferenced(attenuate);
595   magick_unreferenced(exception);
596   return((Image *)NULL);
597 }
598 
599 /*
600 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
601 %                                                                             %
602 %                                                                             %
603 %                                                                             %
604 %     A c c e l e r a t e B l u r I m a g e                                   %
605 %                                                                             %
606 %                                                                             %
607 %                                                                             %
608 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
609 */
610 
ComputeBlurImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,ExceptionInfo * exception)611 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
612   const double radius,const double sigma,ExceptionInfo *exception)
613 {
614   cl_command_queue
615     queue;
616 
617   cl_int
618     status;
619 
620   cl_kernel
621     blurColumnKernel,
622     blurRowKernel;
623 
624   cl_mem
625     filteredImageBuffer,
626     imageBuffer,
627     imageKernelBuffer,
628     tempImageBuffer;
629 
630   cl_uint
631     imageColumns,
632     imageRows,
633     kernelWidth,
634     number_channels;
635 
636   Image
637     *filteredImage;
638 
639   MagickBooleanType
640     outputReady;
641 
642   MagickCLDevice
643     device;
644 
645   MagickSizeType
646     length;
647 
648   size_t
649     chunkSize=256,
650     gsize[2],
651     i,
652     lsize[2];
653 
654   filteredImage=NULL;
655   imageBuffer=NULL;
656   filteredImageBuffer=NULL;
657   tempImageBuffer=NULL;
658   imageKernelBuffer=NULL;
659   blurRowKernel=NULL;
660   blurColumnKernel=NULL;
661   outputReady=MagickFalse;
662 
663   assert(image != (Image *) NULL);
664   assert(image->signature == MagickCoreSignature);
665   if (image->debug != MagickFalse)
666     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
667 
668   device=RequestOpenCLDevice(clEnv);
669   queue=AcquireOpenCLCommandQueue(device);
670   filteredImage=cloneImage(image,exception);
671   if (filteredImage == (Image *) NULL)
672     goto cleanup;
673   if (filteredImage->number_channels != image->number_channels)
674     goto cleanup;
675   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
676   if (imageBuffer == (cl_mem) NULL)
677     goto cleanup;
678   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
679   if (filteredImageBuffer == (cl_mem) NULL)
680     goto cleanup;
681 
682   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
683     exception);
684   if (imageKernelBuffer == (cl_mem) NULL)
685     goto cleanup;
686 
687   length=image->columns*image->rows;
688   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
689     sizeof(cl_float4),(void *) NULL);
690   if (tempImageBuffer == (cl_mem) NULL)
691     goto cleanup;
692 
693   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
694   if (blurRowKernel == (cl_kernel) NULL)
695   {
696     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
697       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
698     goto cleanup;
699   }
700 
701   number_channels=(cl_uint) image->number_channels;
702   imageColumns=(cl_uint) image->columns;
703   imageRows=(cl_uint) image->rows;
704 
705   i=0;
706   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
707   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
708   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
709   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
710   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
711   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
712   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
713   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
714   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
715   if (status != CL_SUCCESS)
716   {
717     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
718       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
719     goto cleanup;
720   }
721 
722   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
723   gsize[1]=image->rows;
724   lsize[0]=chunkSize;
725   lsize[1]=1;
726 
727   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
728     lsize,image,filteredImage,MagickFalse,exception);
729   if (outputReady == MagickFalse)
730     goto cleanup;
731 
732   blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
733   if (blurColumnKernel == (cl_kernel) NULL)
734   {
735     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
736       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
737     goto cleanup;
738   }
739 
740   i=0;
741   status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
742   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
743   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
744   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
745   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
746   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
747   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
748   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
749   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
750   if (status != CL_SUCCESS)
751   {
752     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
753       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
754     goto cleanup;
755   }
756 
757   gsize[0]=image->columns;
758   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
759   lsize[0]=1;
760   lsize[1]=chunkSize;
761 
762   outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
763     lsize,image,filteredImage,MagickFalse,exception);
764 
765 cleanup:
766 
767   if (imageBuffer != (cl_mem) NULL)
768     ReleaseOpenCLMemObject(imageBuffer);
769   if (filteredImageBuffer != (cl_mem) NULL)
770     ReleaseOpenCLMemObject(filteredImageBuffer);
771   if (tempImageBuffer != (cl_mem) NULL)
772     ReleaseOpenCLMemObject(tempImageBuffer);
773   if (imageKernelBuffer != (cl_mem) NULL)
774     ReleaseOpenCLMemObject(imageKernelBuffer);
775   if (blurRowKernel != (cl_kernel) NULL)
776     ReleaseOpenCLKernel(blurRowKernel);
777   if (blurColumnKernel != (cl_kernel) NULL)
778     ReleaseOpenCLKernel(blurColumnKernel);
779   if (queue != (cl_command_queue) NULL)
780     ReleaseOpenCLCommandQueue(device,queue);
781   if (device != (MagickCLDevice) NULL)
782     ReleaseOpenCLDevice(device);
783   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
784     filteredImage=DestroyImage(filteredImage);
785 
786   return(filteredImage);
787 }
788 
AccelerateBlurImage(const Image * image,const double radius,const double sigma,ExceptionInfo * exception)789 MagickPrivate Image* AccelerateBlurImage(const Image *image,
790   const double radius,const double sigma,ExceptionInfo *exception)
791 {
792   Image
793     *filteredImage;
794 
795   MagickCLEnv
796     clEnv;
797 
798   assert(image != NULL);
799   assert(exception != (ExceptionInfo *) NULL);
800   if (image->debug != MagickFalse)
801     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
802 
803   if (checkAccelerateCondition(image) == MagickFalse)
804     return((Image *) NULL);
805 
806   clEnv=getOpenCLEnvironment(exception);
807   if (clEnv == (MagickCLEnv) NULL)
808     return((Image *) NULL);
809 
810   filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
811   return(filteredImage);
812 }
813 
814 /*
815 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
816 %                                                                             %
817 %                                                                             %
818 %                                                                             %
819 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
820 %                                                                             %
821 %                                                                             %
822 %                                                                             %
823 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
824 */
825 
ComputeContrastImage(Image * image,MagickCLEnv clEnv,const MagickBooleanType sharpen,ExceptionInfo * exception)826 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
827   const MagickBooleanType sharpen,ExceptionInfo *exception)
828 {
829   cl_command_queue
830     queue;
831 
832   cl_int
833     status,
834     sign;
835 
836   cl_kernel
837     contrastKernel;
838 
839   cl_mem
840     imageBuffer;
841 
842   cl_uint
843     number_channels;
844 
845   MagickBooleanType
846     outputReady;
847 
848   MagickCLDevice
849     device;
850 
851   size_t
852     gsize[2],
853     i;
854 
855   assert(image != (Image *) NULL);
856   assert(image->signature == MagickCoreSignature);
857   if (image->debug != MagickFalse)
858     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
859 
860   contrastKernel=NULL;
861   imageBuffer=NULL;
862   outputReady=MagickFalse;
863 
864   device=RequestOpenCLDevice(clEnv);
865   queue=AcquireOpenCLCommandQueue(device);
866   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
867   if (imageBuffer == (cl_mem) NULL)
868     goto cleanup;
869 
870   contrastKernel=AcquireOpenCLKernel(device,"Contrast");
871   if (contrastKernel == (cl_kernel) NULL)
872   {
873     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
874       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
875     goto cleanup;
876   }
877 
878   number_channels=(cl_uint) image->number_channels;
879   sign=sharpen != MagickFalse ? 1 : -1;
880 
881   i=0;
882   status =SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
883   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_uint),&number_channels);
884   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_int),&sign);
885   if (status != CL_SUCCESS)
886   {
887     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
888       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
889     goto cleanup;
890   }
891 
892   gsize[0]=image->columns;
893   gsize[1]=image->rows;
894 
895   outputReady=EnqueueOpenCLKernel(queue,contrastKernel,2,(const size_t *) NULL,
896     gsize,(const size_t *) NULL,image,(Image *) NULL,MagickFalse,exception);
897 
898 cleanup:
899 
900   if (imageBuffer != (cl_mem) NULL)
901     ReleaseOpenCLMemObject(imageBuffer);
902   if (contrastKernel != (cl_kernel) NULL)
903     ReleaseOpenCLKernel(contrastKernel);
904   if (queue != (cl_command_queue) NULL)
905     ReleaseOpenCLCommandQueue(device,queue);
906   if (device != (MagickCLDevice) NULL)
907     ReleaseOpenCLDevice(device);
908 
909   return(outputReady);
910 }
911 
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)912 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
913   const MagickBooleanType sharpen,ExceptionInfo *exception)
914 {
915   MagickBooleanType
916     status;
917 
918   MagickCLEnv
919     clEnv;
920 
921   assert(image != NULL);
922   assert(exception != (ExceptionInfo *) NULL);
923   if (image->debug != MagickFalse)
924     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
925 
926   if (checkAccelerateCondition(image) == MagickFalse)
927     return(MagickFalse);
928 
929   clEnv=getOpenCLEnvironment(exception);
930   if (clEnv == (MagickCLEnv) NULL)
931     return(MagickFalse);
932 
933   status=ComputeContrastImage(image,clEnv,sharpen,exception);
934   return(status);
935 }
936 
937 /*
938 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
939 %                                                                             %
940 %                                                                             %
941 %                                                                             %
942 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
943 %                                                                             %
944 %                                                                             %
945 %                                                                             %
946 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
947 */
948 
ComputeContrastStretchImage(Image * image,MagickCLEnv clEnv,const double black_point,const double white_point,ExceptionInfo * exception)949 static MagickBooleanType ComputeContrastStretchImage(Image *image,
950   MagickCLEnv clEnv,const double black_point,const double white_point,
951   ExceptionInfo *exception)
952 {
953 #define ContrastStretchImageTag  "ContrastStretch/Image"
954 #define MaxRange(color)  ((cl_float) ScaleQuantumToMap((Quantum) (color)))
955 
956   CacheView
957     *image_view;
958 
959   cl_command_queue
960     queue;
961 
962   cl_int
963     clStatus;
964 
965   cl_mem_flags
966     mem_flags;
967 
968   cl_mem
969     histogramBuffer,
970     imageBuffer,
971     stretchMapBuffer;
972 
973   cl_kernel
974     histogramKernel,
975     stretchKernel;
976 
977   cl_event
978     event;
979 
980   cl_uint4
981     *histogram;
982 
983   double
984     intensity;
985 
986   cl_float4
987     black,
988     white;
989 
990   MagickBooleanType
991     outputReady,
992     status;
993 
994   MagickCLDevice
995     device;
996 
997   MagickSizeType
998     length;
999 
1000   PixelPacket
1001     *stretch_map;
1002 
1003   ssize_t
1004     i;
1005 
1006   size_t
1007     global_work_size[2];
1008 
1009   void
1010     *hostPtr,
1011     *inputPixels;
1012 
1013   assert(image != (Image *) NULL);
1014   assert(image->signature == MagickCoreSignature);
1015   if (image->debug != MagickFalse)
1016     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1017 
1018   histogram=NULL;
1019   stretch_map=NULL;
1020   inputPixels = NULL;
1021   imageBuffer = NULL;
1022   histogramBuffer = NULL;
1023   stretchMapBuffer = NULL;
1024   histogramKernel = NULL;
1025   stretchKernel = NULL;
1026   queue = NULL;
1027   outputReady = MagickFalse;
1028 
1029   /* exception=(&image->exception); */
1030 
1031   /*
1032     Initialize opencl environment.
1033   */
1034   device = RequestOpenCLDevice(clEnv);
1035   queue = AcquireOpenCLCommandQueue(device);
1036 
1037   /*
1038     Allocate and initialize histogram arrays.
1039   */
1040   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1041 
1042   if (histogram == (cl_uint4 *) NULL)
1043     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1044 
1045   /* reset histogram */
1046   (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
1047 
1048   /*
1049   if (IsGrayImage(image,exception) != MagickFalse)
1050     (void) SetImageColorspace(image,GRAYColorspace);
1051   */
1052 
1053   status=MagickTrue;
1054 
1055 
1056   /*
1057     Form histogram.
1058   */
1059   /* Create and initialize OpenCL buffers. */
1060   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1061   /* assume this  will get a writable image */
1062   image_view=AcquireAuthenticCacheView(image,exception);
1063   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1064 
1065   if (inputPixels == (void *) NULL)
1066   {
1067     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1068     goto cleanup;
1069   }
1070   /* If the host pointer is aligned to the size of CLPixelPacket,
1071      then use the host buffer directly from the GPU; otherwise,
1072      create a buffer on the GPU and copy the data over */
1073   if (ALIGNED(inputPixels,CLPixelPacket))
1074   {
1075     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1076   }
1077   else
1078   {
1079     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1080   }
1081   /* create a CL buffer from image pixel buffer */
1082   length = image->columns * image->rows;
1083   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1084   if (clStatus != CL_SUCCESS)
1085   {
1086     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1087     goto cleanup;
1088   }
1089 
1090   /* If the host pointer is aligned to the size of cl_uint,
1091      then use the host buffer directly from the GPU; otherwise,
1092      create a buffer on the GPU and copy the data over */
1093   if (ALIGNED(histogram,cl_uint4))
1094   {
1095     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1096     hostPtr = histogram;
1097   }
1098   else
1099   {
1100     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1101     hostPtr = histogram;
1102   }
1103   /* create a CL buffer for histogram  */
1104   length = (MaxMap+1);
1105   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1106   if (clStatus != CL_SUCCESS)
1107   {
1108     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1109     goto cleanup;
1110   }
1111 
1112   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1113   if (status == MagickFalse)
1114     goto cleanup;
1115 
1116   /* read from the kenel output */
1117   if (ALIGNED(histogram,cl_uint4))
1118   {
1119     length = (MaxMap+1);
1120     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1121   }
1122   else
1123   {
1124     length = (MaxMap+1);
1125     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1126   }
1127   if (clStatus != CL_SUCCESS)
1128   {
1129     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1130     goto cleanup;
1131   }
1132 
1133   /* unmap, don't block gpu to use this buffer again.  */
1134   if (ALIGNED(histogram,cl_uint4))
1135   {
1136     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1137     if (clStatus != CL_SUCCESS)
1138     {
1139       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1140       goto cleanup;
1141     }
1142   }
1143 
1144   /* recreate input buffer later, in case image updated */
1145 #ifdef RECREATEBUFFER
1146   if (imageBuffer!=NULL)
1147     clEnv->library->clReleaseMemObject(imageBuffer);
1148 #endif
1149 
1150   /* CPU stuff */
1151   /*
1152      Find the histogram boundaries by locating the black/white levels.
1153   */
1154   black.x=0.0;
1155   white.x=MaxRange(QuantumRange);
1156   if ((image->channel_mask & RedChannel) != 0)
1157   {
1158     intensity=0.0;
1159     for (i=0; i <= (ssize_t) MaxMap; i++)
1160     {
1161       intensity+=histogram[i].s[2];
1162       if (intensity > black_point)
1163         break;
1164     }
1165     black.x=(cl_float) i;
1166     intensity=0.0;
1167     for (i=(ssize_t) MaxMap; i != 0; i--)
1168     {
1169       intensity+=histogram[i].s[2];
1170       if (intensity > ((double) image->columns*image->rows-white_point))
1171         break;
1172     }
1173     white.x=(cl_float) i;
1174   }
1175   black.y=0.0;
1176   white.y=MaxRange(QuantumRange);
1177   if ((image->channel_mask & GreenChannel) != 0)
1178   {
1179     intensity=0.0;
1180     for (i=0; i <= (ssize_t) MaxMap; i++)
1181     {
1182       intensity+=histogram[i].s[2];
1183       if (intensity > black_point)
1184         break;
1185     }
1186     black.y=(cl_float) i;
1187     intensity=0.0;
1188     for (i=(ssize_t) MaxMap; i != 0; i--)
1189     {
1190       intensity+=histogram[i].s[2];
1191       if (intensity > ((double) image->columns*image->rows-white_point))
1192         break;
1193     }
1194     white.y=(cl_float) i;
1195   }
1196   black.z=0.0;
1197   white.z=MaxRange(QuantumRange);
1198   if ((image->channel_mask & BlueChannel) != 0)
1199   {
1200     intensity=0.0;
1201     for (i=0; i <= (ssize_t) MaxMap; i++)
1202     {
1203       intensity+=histogram[i].s[2];
1204       if (intensity > black_point)
1205         break;
1206     }
1207     black.z=(cl_float) i;
1208     intensity=0.0;
1209     for (i=(ssize_t) MaxMap; i != 0; i--)
1210     {
1211       intensity+=histogram[i].s[2];
1212       if (intensity > ((double) image->columns*image->rows-white_point))
1213         break;
1214     }
1215     white.z=(cl_float) i;
1216   }
1217   black.w=0.0;
1218   white.w=MaxRange(QuantumRange);
1219   if ((image->channel_mask & AlphaChannel) != 0)
1220   {
1221     intensity=0.0;
1222     for (i=0; i <= (ssize_t) MaxMap; i++)
1223     {
1224       intensity+=histogram[i].s[2];
1225       if (intensity > black_point)
1226         break;
1227     }
1228     black.w=(cl_float) i;
1229     intensity=0.0;
1230     for (i=(ssize_t) MaxMap; i != 0; i--)
1231     {
1232       intensity+=histogram[i].s[2];
1233       if (intensity > ((double) image->columns*image->rows-white_point))
1234         break;
1235     }
1236     white.w=(cl_float) i;
1237   }
1238 
1239   stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1240     sizeof(*stretch_map));
1241 
1242   if (stretch_map == (PixelPacket *) NULL)
1243     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1244       image->filename);
1245 
1246   /*
1247     Stretch the histogram to create the stretched image mapping.
1248   */
1249   (void) memset(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1250   for (i=0; i <= (ssize_t) MaxMap; i++)
1251   {
1252     if ((image->channel_mask & RedChannel) != 0)
1253     {
1254       if (i < (ssize_t) black.x)
1255         stretch_map[i].red=(Quantum) 0;
1256       else
1257         if (i > (ssize_t) white.x)
1258           stretch_map[i].red=QuantumRange;
1259         else
1260           if (black.x != white.x)
1261             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1262                   (i-black.x)/(white.x-black.x)));
1263     }
1264     if ((image->channel_mask & GreenChannel) != 0)
1265     {
1266       if (i < (ssize_t) black.y)
1267         stretch_map[i].green=0;
1268       else
1269         if (i > (ssize_t) white.y)
1270           stretch_map[i].green=QuantumRange;
1271         else
1272           if (black.y != white.y)
1273             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1274                   (i-black.y)/(white.y-black.y)));
1275     }
1276     if ((image->channel_mask & BlueChannel) != 0)
1277     {
1278       if (i < (ssize_t) black.z)
1279         stretch_map[i].blue=0;
1280       else
1281         if (i > (ssize_t) white.z)
1282           stretch_map[i].blue= QuantumRange;
1283         else
1284           if (black.z != white.z)
1285             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1286                   (i-black.z)/(white.z-black.z)));
1287     }
1288     if ((image->channel_mask & AlphaChannel) != 0)
1289     {
1290       if (i < (ssize_t) black.w)
1291         stretch_map[i].alpha=0;
1292       else
1293         if (i > (ssize_t) white.w)
1294           stretch_map[i].alpha=QuantumRange;
1295         else
1296           if (black.w != white.w)
1297             stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1298                   (i-black.w)/(white.w-black.w)));
1299     }
1300   }
1301 
1302   /*
1303     Stretch the image.
1304   */
1305   if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1306       (image->colorspace == CMYKColorspace)))
1307     image->storage_class=DirectClass;
1308   if (image->storage_class == PseudoClass)
1309   {
1310     /*
1311        Stretch colormap.
1312        */
1313     for (i=0; i < (ssize_t) image->colors; i++)
1314     {
1315       if ((image->channel_mask & RedChannel) != 0)
1316       {
1317         if (black.x != white.x)
1318           image->colormap[i].red=stretch_map[
1319             ScaleQuantumToMap(image->colormap[i].red)].red;
1320       }
1321       if ((image->channel_mask & GreenChannel) != 0)
1322       {
1323         if (black.y != white.y)
1324           image->colormap[i].green=stretch_map[
1325             ScaleQuantumToMap(image->colormap[i].green)].green;
1326       }
1327       if ((image->channel_mask & BlueChannel) != 0)
1328       {
1329         if (black.z != white.z)
1330           image->colormap[i].blue=stretch_map[
1331             ScaleQuantumToMap(image->colormap[i].blue)].blue;
1332       }
1333       if ((image->channel_mask & AlphaChannel) != 0)
1334       {
1335         if (black.w != white.w)
1336           image->colormap[i].alpha=stretch_map[
1337             ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1338       }
1339     }
1340   }
1341 
1342   /*
1343     Stretch image.
1344   */
1345 
1346 
1347   /* GPU can work on this again, image and equalize map as input
1348     image:        uchar4 (CLPixelPacket)
1349     stretch_map:  uchar4 (PixelPacket)
1350     black, white: float4 (FloatPixelPacket) */
1351 
1352 #ifdef RECREATEBUFFER
1353   /* If the host pointer is aligned to the size of CLPixelPacket,
1354      then use the host buffer directly from the GPU; otherwise,
1355      create a buffer on the GPU and copy the data over */
1356   if (ALIGNED(inputPixels,CLPixelPacket))
1357   {
1358     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1359   }
1360   else
1361   {
1362     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1363   }
1364   /* create a CL buffer from image pixel buffer */
1365   length = image->columns * image->rows;
1366   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1367   if (clStatus != CL_SUCCESS)
1368   {
1369     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1370     goto cleanup;
1371   }
1372 #endif
1373 
1374   /* Create and initialize OpenCL buffers. */
1375   if (ALIGNED(stretch_map, PixelPacket))
1376   {
1377     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1378     hostPtr = stretch_map;
1379   }
1380   else
1381   {
1382     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1383     hostPtr = stretch_map;
1384   }
1385   /* create a CL buffer for stretch_map  */
1386   length = (MaxMap+1);
1387   stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1388   if (clStatus != CL_SUCCESS)
1389   {
1390     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1391     goto cleanup;
1392   }
1393 
1394   /* get the OpenCL kernel */
1395   stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1396   if (stretchKernel == NULL)
1397   {
1398     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1399     goto cleanup;
1400   }
1401 
1402   /* set the kernel arguments */
1403   i = 0;
1404   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1405   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1406   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1407   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
1408   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
1409   if (clStatus != CL_SUCCESS)
1410   {
1411     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1412     goto cleanup;
1413   }
1414 
1415   /* launch the kernel */
1416   global_work_size[0] = image->columns;
1417   global_work_size[1] = image->rows;
1418 
1419   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1420 
1421   if (clStatus != CL_SUCCESS)
1422   {
1423     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1424     goto cleanup;
1425   }
1426   RecordProfileData(device,stretchKernel,event);
1427 
1428   /* read the data back */
1429   if (ALIGNED(inputPixels,CLPixelPacket))
1430   {
1431     length = image->columns * image->rows;
1432     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1433   }
1434   else
1435   {
1436     length = image->columns * image->rows;
1437     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1438   }
1439   if (clStatus != CL_SUCCESS)
1440   {
1441     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1442     goto cleanup;
1443   }
1444 
1445   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1446 
1447 cleanup:
1448 
1449   image_view=DestroyCacheView(image_view);
1450 
1451   if (imageBuffer!=NULL)
1452     clEnv->library->clReleaseMemObject(imageBuffer);
1453 
1454   if (stretchMapBuffer!=NULL)
1455     clEnv->library->clReleaseMemObject(stretchMapBuffer);
1456   if (stretch_map!=NULL)
1457     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1458   if (histogramBuffer!=NULL)
1459     clEnv->library->clReleaseMemObject(histogramBuffer);
1460   if (histogram!=NULL)
1461     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1462   if (histogramKernel!=NULL)
1463     ReleaseOpenCLKernel(histogramKernel);
1464   if (stretchKernel!=NULL)
1465     ReleaseOpenCLKernel(stretchKernel);
1466   if (queue != NULL)
1467     ReleaseOpenCLCommandQueue(device,queue);
1468   if (device != NULL)
1469     ReleaseOpenCLDevice(device);
1470 
1471   return(outputReady);
1472 }
1473 
AccelerateContrastStretchImage(Image * image,const double black_point,const double white_point,ExceptionInfo * exception)1474 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1475   Image *image,const double black_point,const double white_point,
1476   ExceptionInfo *exception)
1477 {
1478   MagickBooleanType
1479     status;
1480 
1481   MagickCLEnv
1482     clEnv;
1483 
1484   assert(image != NULL);
1485   assert(exception != (ExceptionInfo *) NULL);
1486   if (image->debug != MagickFalse)
1487     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1488 
1489   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1490       (checkHistogramCondition(image,image->intensity) == MagickFalse))
1491     return(MagickFalse);
1492 
1493   clEnv=getOpenCLEnvironment(exception);
1494   if (clEnv == (MagickCLEnv) NULL)
1495     return(MagickFalse);
1496 
1497   status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1498     exception);
1499   return(status);
1500 }
1501 
1502 /*
1503 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1504 %                                                                             %
1505 %                                                                             %
1506 %                                                                             %
1507 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
1508 %                                                                             %
1509 %                                                                             %
1510 %                                                                             %
1511 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1512 */
1513 
ComputeConvolveImage(const Image * image,MagickCLEnv clEnv,const KernelInfo * kernel,ExceptionInfo * exception)1514 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1515   const KernelInfo *kernel,ExceptionInfo *exception)
1516 {
1517   CacheView
1518     *filteredImage_view,
1519     *image_view;
1520 
1521   cl_command_queue
1522     queue;
1523 
1524   cl_event
1525     event;
1526 
1527   cl_kernel
1528     clkernel;
1529 
1530   cl_int
1531     clStatus;
1532 
1533   cl_mem
1534     convolutionKernel,
1535     filteredImageBuffer,
1536     imageBuffer;
1537 
1538   cl_mem_flags
1539     mem_flags;
1540 
1541   const void
1542     *inputPixels;
1543 
1544   float
1545     *kernelBufferPtr;
1546 
1547   Image
1548     *filteredImage;
1549 
1550   MagickBooleanType
1551     outputReady;
1552 
1553   MagickCLDevice
1554     device;
1555 
1556   MagickSizeType
1557     length;
1558 
1559   size_t
1560     global_work_size[3],
1561     localGroupSize[3],
1562     localMemoryRequirement;
1563 
1564   unsigned
1565     kernelSize;
1566 
1567   unsigned int
1568     filterHeight,
1569     filterWidth,
1570     i,
1571     imageHeight,
1572     imageWidth,
1573     matte;
1574 
1575   void
1576     *filteredPixels,
1577     *hostPtr;
1578 
1579   /* intialize all CL objects to NULL */
1580   imageBuffer = NULL;
1581   filteredImageBuffer = NULL;
1582   convolutionKernel = NULL;
1583   clkernel = NULL;
1584   queue = NULL;
1585 
1586   filteredImage = NULL;
1587   filteredImage_view = NULL;
1588   outputReady = MagickFalse;
1589 
1590   device = RequestOpenCLDevice(clEnv);
1591 
1592   image_view=AcquireAuthenticCacheView(image,exception);
1593   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1594   if (inputPixels == (const void *) NULL)
1595   {
1596     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1597     goto cleanup;
1598   }
1599 
1600   /* Create and initialize OpenCL buffers. */
1601 
1602   /* If the host pointer is aligned to the size of CLPixelPacket,
1603      then use the host buffer directly from the GPU; otherwise,
1604      create a buffer on the GPU and copy the data over */
1605   if (ALIGNED(inputPixels,CLPixelPacket))
1606   {
1607     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1608   }
1609   else
1610   {
1611     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1612   }
1613   /* create a CL buffer from image pixel buffer */
1614   length = image->columns * image->rows;
1615   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1616   if (clStatus != CL_SUCCESS)
1617   {
1618     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1619     goto cleanup;
1620   }
1621 
1622   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1623   assert(filteredImage != NULL);
1624   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1625   {
1626     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1627     goto cleanup;
1628   }
1629   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1630   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1631   if (filteredPixels == (void *) NULL)
1632   {
1633     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1634     goto cleanup;
1635   }
1636 
1637   if (ALIGNED(filteredPixels,CLPixelPacket))
1638   {
1639     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1640     hostPtr = filteredPixels;
1641   }
1642   else
1643   {
1644     mem_flags = CL_MEM_WRITE_ONLY;
1645     hostPtr = NULL;
1646   }
1647   /* create a CL buffer from image pixel buffer */
1648   length = image->columns * image->rows;
1649   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1650   if (clStatus != CL_SUCCESS)
1651   {
1652     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1653     goto cleanup;
1654   }
1655 
1656   kernelSize = (unsigned int) (kernel->width * kernel->height);
1657   convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1658   if (clStatus != CL_SUCCESS)
1659   {
1660     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1661     goto cleanup;
1662   }
1663 
1664   queue = AcquireOpenCLCommandQueue(device);
1665 
1666   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1667           , 0, NULL, NULL, &clStatus);
1668   if (clStatus != CL_SUCCESS)
1669   {
1670     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1671     goto cleanup;
1672   }
1673   for (i = 0; i < kernelSize; i++)
1674   {
1675     kernelBufferPtr[i] = (float) kernel->values[i];
1676   }
1677   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1678   if (clStatus != CL_SUCCESS)
1679   {
1680     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1681     goto cleanup;
1682   }
1683 
1684   /* Compute the local memory requirement for a 16x16 workgroup.
1685      If it's larger than 16k, reduce the workgroup size to 8x8 */
1686   localGroupSize[0] = 16;
1687   localGroupSize[1] = 16;
1688   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1689     + kernel->width*kernel->height*sizeof(float);
1690 
1691   if (localMemoryRequirement > device->local_memory_size)
1692   {
1693     localGroupSize[0] = 8;
1694     localGroupSize[1] = 8;
1695     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1696       + kernel->width*kernel->height*sizeof(float);
1697   }
1698   if (localMemoryRequirement <= device->local_memory_size)
1699   {
1700     /* get the OpenCL kernel */
1701     clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1702     if (clkernel == NULL)
1703     {
1704       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1705       goto cleanup;
1706     }
1707 
1708     /* set the kernel arguments */
1709     i = 0;
1710     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1711     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1712     imageWidth = (unsigned int) image->columns;
1713     imageHeight = (unsigned int) image->rows;
1714     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1715     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1716     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1717     filterWidth = (unsigned int) kernel->width;
1718     filterHeight = (unsigned int) kernel->height;
1719     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1720     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1721     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1722     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1723     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1724     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1725     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1726     if (clStatus != CL_SUCCESS)
1727     {
1728       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1729       goto cleanup;
1730     }
1731 
1732     /* pad the global size to a multiple of the local work size dimension */
1733     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1734     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1735 
1736     /* launch the kernel */
1737     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1738     if (clStatus != CL_SUCCESS)
1739     {
1740       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1741       goto cleanup;
1742     }
1743     RecordProfileData(device,clkernel,event);
1744   }
1745   else
1746   {
1747     /* get the OpenCL kernel */
1748     clkernel = AcquireOpenCLKernel(device,"Convolve");
1749     if (clkernel == NULL)
1750     {
1751       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1752       goto cleanup;
1753     }
1754 
1755     /* set the kernel arguments */
1756     i = 0;
1757     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1758     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1759     imageWidth = (unsigned int) image->columns;
1760     imageHeight = (unsigned int) image->rows;
1761     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1762     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1763     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1764     filterWidth = (unsigned int) kernel->width;
1765     filterHeight = (unsigned int) kernel->height;
1766     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1767     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1768     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1769     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1770     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1771     if (clStatus != CL_SUCCESS)
1772     {
1773       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1774       goto cleanup;
1775     }
1776 
1777     localGroupSize[0] = 8;
1778     localGroupSize[1] = 8;
1779     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1780     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1781 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1782 
1783     if (clStatus != CL_SUCCESS)
1784     {
1785       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1786       goto cleanup;
1787     }
1788   }
1789   RecordProfileData(device,clkernel,event);
1790 
1791   if (ALIGNED(filteredPixels,CLPixelPacket))
1792   {
1793     length = image->columns * image->rows;
1794     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1795   }
1796   else
1797   {
1798     length = image->columns * image->rows;
1799     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1800   }
1801   if (clStatus != CL_SUCCESS)
1802   {
1803     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1804     goto cleanup;
1805   }
1806 
1807   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1808 
1809 cleanup:
1810 
1811   image_view=DestroyCacheView(image_view);
1812   if (filteredImage_view != NULL)
1813     filteredImage_view=DestroyCacheView(filteredImage_view);
1814   if (imageBuffer != NULL)
1815     clEnv->library->clReleaseMemObject(imageBuffer);
1816   if (filteredImageBuffer != NULL)
1817     clEnv->library->clReleaseMemObject(filteredImageBuffer);
1818   if (convolutionKernel != NULL)
1819     clEnv->library->clReleaseMemObject(convolutionKernel);
1820   if (clkernel != NULL)
1821     ReleaseOpenCLKernel(clkernel);
1822   if (queue != NULL)
1823     ReleaseOpenCLCommandQueue(device,queue);
1824   if (device != NULL)
1825     ReleaseOpenCLDevice(device);
1826   if (outputReady == MagickFalse)
1827   {
1828     if (filteredImage != NULL)
1829     {
1830       DestroyImage(filteredImage);
1831       filteredImage = NULL;
1832     }
1833   }
1834 
1835   return(filteredImage);
1836 }
1837 
AccelerateConvolveImage(const Image * image,const KernelInfo * kernel,ExceptionInfo * exception)1838 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1839   const KernelInfo *kernel,ExceptionInfo *exception)
1840 {
1841   /* Temporary disabled due to access violation
1842 
1843   Image
1844     *filteredImage;
1845 
1846   assert(image != NULL);
1847   assert(kernel != (KernelInfo *) NULL);
1848   assert(exception != (ExceptionInfo *) NULL);
1849   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1850       (checkOpenCLEnvironment(exception) == MagickFalse))
1851     return((Image *) NULL);
1852 
1853   filteredImage=ComputeConvolveImage(image,kernel,exception);
1854   return(filteredImage);
1855   */
1856   magick_unreferenced(image);
1857   magick_unreferenced(kernel);
1858   magick_unreferenced(exception);
1859   return((Image *)NULL);
1860 }
1861 
1862 /*
1863 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1864 %                                                                             %
1865 %                                                                             %
1866 %                                                                             %
1867 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
1868 %                                                                             %
1869 %                                                                             %
1870 %                                                                             %
1871 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1872 */
1873 
ComputeDespeckleImage(const Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)1874 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1875   ExceptionInfo*exception)
1876 {
1877   static const int
1878     X[4] = {0, 1, 1,-1},
1879     Y[4] = {1, 0, 1, 1};
1880 
1881   CacheView
1882     *filteredImage_view,
1883     *image_view;
1884 
1885   cl_command_queue
1886     queue;
1887 
1888   cl_int
1889     clStatus;
1890 
1891   cl_kernel
1892     hullPass1,
1893     hullPass2;
1894 
1895   cl_event
1896     event;
1897 
1898   cl_mem_flags
1899     mem_flags;
1900 
1901   cl_mem
1902     filteredImageBuffer,
1903     imageBuffer,
1904     tempImageBuffer[2];
1905 
1906   const void
1907     *inputPixels;
1908 
1909   Image
1910     *filteredImage;
1911 
1912   int
1913     k,
1914     matte;
1915 
1916   MagickBooleanType
1917     outputReady;
1918 
1919   MagickCLDevice
1920     device;
1921 
1922   MagickSizeType
1923     length;
1924 
1925   size_t
1926     global_work_size[2];
1927 
1928   unsigned int
1929     imageHeight,
1930     imageWidth;
1931 
1932   void
1933     *filteredPixels,
1934     *hostPtr;
1935 
1936   outputReady = MagickFalse;
1937   inputPixels = NULL;
1938   filteredImage = NULL;
1939   filteredImage_view = NULL;
1940   filteredPixels = NULL;
1941   imageBuffer = NULL;
1942   filteredImageBuffer = NULL;
1943   hullPass1 = NULL;
1944   hullPass2 = NULL;
1945   queue = NULL;
1946   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
1947 
1948   device = RequestOpenCLDevice(clEnv);
1949   queue = AcquireOpenCLCommandQueue(device);
1950 
1951   image_view=AcquireAuthenticCacheView(image,exception);
1952   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1953   if (inputPixels == (void *) NULL)
1954   {
1955     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1956     goto cleanup;
1957   }
1958 
1959   if (ALIGNED(inputPixels,CLPixelPacket))
1960   {
1961     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1962   }
1963   else
1964   {
1965     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1966   }
1967   /* create a CL buffer from image pixel buffer */
1968   length = image->columns * image->rows;
1969   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1970   if (clStatus != CL_SUCCESS)
1971   {
1972     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1973     goto cleanup;
1974   }
1975 
1976   mem_flags = CL_MEM_READ_WRITE;
1977   length = image->columns * image->rows;
1978   for (k = 0; k < 2; k++)
1979   {
1980     tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
1981     if (clStatus != CL_SUCCESS)
1982     {
1983       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1984       goto cleanup;
1985     }
1986   }
1987 
1988   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1989   assert(filteredImage != NULL);
1990   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1991   {
1992     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1993     goto cleanup;
1994   }
1995   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1996   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1997   if (filteredPixels == (void *) NULL)
1998   {
1999     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2000     goto cleanup;
2001   }
2002 
2003   if (ALIGNED(filteredPixels,CLPixelPacket))
2004   {
2005     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2006     hostPtr = filteredPixels;
2007   }
2008   else
2009   {
2010     mem_flags = CL_MEM_WRITE_ONLY;
2011     hostPtr = NULL;
2012   }
2013   /* create a CL buffer from image pixel buffer */
2014   length = image->columns * image->rows;
2015   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2016   if (clStatus != CL_SUCCESS)
2017   {
2018     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2019     goto cleanup;
2020   }
2021 
2022   hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2023   hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2024 
2025   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2026   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2027   imageWidth = (unsigned int) image->columns;
2028   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2029   imageHeight = (unsigned int) image->rows;
2030   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2031   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2032   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2033   if (clStatus != CL_SUCCESS)
2034   {
2035     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2036     goto cleanup;
2037   }
2038 
2039   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2040   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2041   imageWidth = (unsigned int) image->columns;
2042   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2043   imageHeight = (unsigned int) image->rows;
2044   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2045   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2046   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2047   if (clStatus != CL_SUCCESS)
2048   {
2049     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2050     goto cleanup;
2051   }
2052 
2053 
2054   global_work_size[0] = image->columns;
2055   global_work_size[1] = image->rows;
2056 
2057 
2058   for (k = 0; k < 4; k++)
2059   {
2060     cl_int2 offset;
2061     int polarity;
2062 
2063 
2064     offset.s[0] = X[k];
2065     offset.s[1] = Y[k];
2066     polarity = 1;
2067     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2068     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2069     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2070     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2071     if (clStatus != CL_SUCCESS)
2072     {
2073       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2074       goto cleanup;
2075     }
2076     /* launch the kernel */
2077 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2078     if (clStatus != CL_SUCCESS)
2079     {
2080       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2081       goto cleanup;
2082     }
2083     RecordProfileData(device,hullPass1,event);
2084 
2085     /* launch the kernel */
2086 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2087     if (clStatus != CL_SUCCESS)
2088     {
2089       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2090       goto cleanup;
2091     }
2092     RecordProfileData(device,hullPass2,event);
2093 
2094     if (k == 0)
2095       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2096     offset.s[0] = -X[k];
2097     offset.s[1] = -Y[k];
2098     polarity = 1;
2099     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2100     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2101     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2102     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2103     if (clStatus != CL_SUCCESS)
2104     {
2105       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2106       goto cleanup;
2107     }
2108     /* launch the kernel */
2109 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2110     if (clStatus != CL_SUCCESS)
2111     {
2112       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2113       goto cleanup;
2114     }
2115     RecordProfileData(device,hullPass1,event);
2116 
2117     /* launch the kernel */
2118 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2119     if (clStatus != CL_SUCCESS)
2120     {
2121       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2122       goto cleanup;
2123     }
2124     RecordProfileData(device,hullPass2,event);
2125 
2126     offset.s[0] = -X[k];
2127     offset.s[1] = -Y[k];
2128     polarity = -1;
2129     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2130     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2131     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2132     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2133     if (clStatus != CL_SUCCESS)
2134     {
2135       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2136       goto cleanup;
2137     }
2138     /* launch the kernel */
2139 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2140     if (clStatus != CL_SUCCESS)
2141     {
2142       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2143       goto cleanup;
2144     }
2145     RecordProfileData(device,hullPass1,event);
2146 
2147     /* launch the kernel */
2148 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2149     if (clStatus != CL_SUCCESS)
2150     {
2151       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2152       goto cleanup;
2153     }
2154     RecordProfileData(device,hullPass2,event);
2155 
2156     offset.s[0] = X[k];
2157     offset.s[1] = Y[k];
2158     polarity = -1;
2159     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2160     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2161     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2162     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2163 
2164     if (k == 3)
2165       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2166 
2167     if (clStatus != CL_SUCCESS)
2168     {
2169       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2170       goto cleanup;
2171     }
2172     /* launch the kernel */
2173 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2174     if (clStatus != CL_SUCCESS)
2175     {
2176       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2177       goto cleanup;
2178     }
2179     RecordProfileData(device,hullPass1,event);
2180 
2181     /* launch the kernel */
2182 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2183     if (clStatus != CL_SUCCESS)
2184     {
2185       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2186       goto cleanup;
2187     }
2188     RecordProfileData(device,hullPass2,event);
2189   }
2190 
2191   if (ALIGNED(filteredPixels,CLPixelPacket))
2192   {
2193     length = image->columns * image->rows;
2194     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2195   }
2196   else
2197   {
2198     length = image->columns * image->rows;
2199     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2200   }
2201   if (clStatus != CL_SUCCESS)
2202   {
2203     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2204     goto cleanup;
2205   }
2206 
2207   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2208 
2209 cleanup:
2210 
2211   image_view=DestroyCacheView(image_view);
2212   if (filteredImage_view != NULL)
2213     filteredImage_view=DestroyCacheView(filteredImage_view);
2214 
2215   if (queue != NULL)
2216     ReleaseOpenCLCommandQueue(device,queue);
2217   if (device != NULL)
2218     ReleaseOpenCLDevice(device);
2219   if (imageBuffer!=NULL)
2220     clEnv->library->clReleaseMemObject(imageBuffer);
2221   for (k = 0; k < 2; k++)
2222   {
2223     if (tempImageBuffer[k]!=NULL)
2224       clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2225   }
2226   if (filteredImageBuffer!=NULL)
2227     clEnv->library->clReleaseMemObject(filteredImageBuffer);
2228   if (hullPass1!=NULL)
2229     ReleaseOpenCLKernel(hullPass1);
2230   if (hullPass2!=NULL)
2231     ReleaseOpenCLKernel(hullPass2);
2232   if (outputReady == MagickFalse && filteredImage != NULL)
2233     filteredImage=DestroyImage(filteredImage);
2234 
2235   return(filteredImage);
2236 }
2237 
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2238 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2239   ExceptionInfo* exception)
2240 {
2241   Image
2242     *filteredImage;
2243 
2244   MagickCLEnv
2245     clEnv;
2246 
2247   assert(image != NULL);
2248   assert(exception != (ExceptionInfo *) NULL);
2249 
2250   if (checkAccelerateConditionRGBA(image) == MagickFalse)
2251     return((Image *) NULL);
2252 
2253   clEnv=getOpenCLEnvironment(exception);
2254   if (clEnv == (MagickCLEnv) NULL)
2255     return((Image *) NULL);
2256 
2257   filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2258   return(filteredImage);
2259 }
2260 
2261 /*
2262 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2263 %                                                                             %
2264 %                                                                             %
2265 %                                                                             %
2266 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
2267 %                                                                             %
2268 %                                                                             %
2269 %                                                                             %
2270 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2271 */
2272 
ComputeEqualizeImage(Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)2273 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2274   ExceptionInfo *exception)
2275 {
2276 #define EqualizeImageTag  "Equalize/Image"
2277 
2278   CacheView
2279     *image_view;
2280 
2281   cl_command_queue
2282     queue;
2283 
2284   cl_int
2285     clStatus;
2286 
2287   cl_mem_flags
2288     mem_flags;
2289 
2290   cl_mem
2291     equalizeMapBuffer,
2292     histogramBuffer,
2293     imageBuffer;
2294 
2295   cl_kernel
2296     equalizeKernel,
2297     histogramKernel;
2298 
2299   cl_event
2300     event;
2301 
2302   cl_uint4
2303     *histogram;
2304 
2305   cl_float4
2306     white,
2307     black,
2308     intensity,
2309     *map;
2310 
2311   MagickBooleanType
2312     outputReady,
2313     status;
2314 
2315   MagickCLDevice
2316     device;
2317 
2318   MagickSizeType
2319     length;
2320 
2321   PixelPacket
2322     *equalize_map;
2323 
2324   ssize_t
2325     i;
2326 
2327   size_t
2328     global_work_size[2];
2329 
2330   void
2331     *hostPtr,
2332     *inputPixels;
2333 
2334   assert(image != (Image *) NULL);
2335   assert(image->signature == MagickCoreSignature);
2336   if (image->debug != MagickFalse)
2337     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2338 
2339   map=NULL;
2340   histogram=NULL;
2341   equalize_map=NULL;
2342   inputPixels = NULL;
2343   imageBuffer = NULL;
2344   histogramBuffer = NULL;
2345   equalizeMapBuffer = NULL;
2346   histogramKernel = NULL;
2347   equalizeKernel = NULL;
2348   queue = NULL;
2349   outputReady = MagickFalse;
2350 
2351   /*
2352    * initialize opencl env
2353    */
2354   device = RequestOpenCLDevice(clEnv);
2355   queue = AcquireOpenCLCommandQueue(device);
2356 
2357   /*
2358     Allocate and initialize histogram arrays.
2359   */
2360   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2361   if (histogram == (cl_uint4 *) NULL)
2362       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2363 
2364   /* reset histogram */
2365   (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
2366 
2367   /* Create and initialize OpenCL buffers. */
2368   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2369   /* assume this  will get a writable image */
2370   image_view=AcquireAuthenticCacheView(image,exception);
2371   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2372 
2373   if (inputPixels == (void *) NULL)
2374   {
2375     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2376     goto cleanup;
2377   }
2378   /* If the host pointer is aligned to the size of CLPixelPacket,
2379      then use the host buffer directly from the GPU; otherwise,
2380      create a buffer on the GPU and copy the data over */
2381   if (ALIGNED(inputPixels,CLPixelPacket))
2382   {
2383     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2384   }
2385   else
2386   {
2387     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2388   }
2389   /* create a CL buffer from image pixel buffer */
2390   length = image->columns * image->rows;
2391   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2392   if (clStatus != CL_SUCCESS)
2393   {
2394     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2395     goto cleanup;
2396   }
2397 
2398   /* If the host pointer is aligned to the size of cl_uint,
2399      then use the host buffer directly from the GPU; otherwise,
2400      create a buffer on the GPU and copy the data over */
2401   if (ALIGNED(histogram,cl_uint4))
2402   {
2403     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2404     hostPtr = histogram;
2405   }
2406   else
2407   {
2408     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2409     hostPtr = histogram;
2410   }
2411   /* create a CL buffer for histogram  */
2412   length = (MaxMap+1);
2413   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2414   if (clStatus != CL_SUCCESS)
2415   {
2416     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2417     goto cleanup;
2418   }
2419 
2420   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2421   if (status == MagickFalse)
2422     goto cleanup;
2423 
2424   /* read from the kenel output */
2425   if (ALIGNED(histogram,cl_uint4))
2426   {
2427     length = (MaxMap+1);
2428     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2429   }
2430   else
2431   {
2432     length = (MaxMap+1);
2433     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2434   }
2435   if (clStatus != CL_SUCCESS)
2436   {
2437     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2438     goto cleanup;
2439   }
2440 
2441   /* unmap, don't block gpu to use this buffer again.  */
2442   if (ALIGNED(histogram,cl_uint4))
2443   {
2444     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2445     if (clStatus != CL_SUCCESS)
2446     {
2447       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2448       goto cleanup;
2449     }
2450   }
2451 
2452   /* recreate input buffer later, in case image updated */
2453 #ifdef RECREATEBUFFER
2454   if (imageBuffer!=NULL)
2455     clEnv->library->clReleaseMemObject(imageBuffer);
2456 #endif
2457 
2458   /* CPU stuff */
2459   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2460   if (equalize_map == (PixelPacket *) NULL)
2461     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2462 
2463   map=(cl_float4 *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2464   if (map == (cl_float4 *) NULL)
2465     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2466 
2467   /*
2468     Integrate the histogram to get the equalization map.
2469   */
2470   (void) memset(&intensity,0,sizeof(intensity));
2471   for (i=0; i <= (ssize_t) MaxMap; i++)
2472   {
2473     if ((image->channel_mask & SyncChannels) != 0)
2474     {
2475       intensity.x+=histogram[i].s[2];
2476       map[i]=intensity;
2477       continue;
2478     }
2479     if ((image->channel_mask & RedChannel) != 0)
2480       intensity.x+=histogram[i].s[2];
2481     if ((image->channel_mask & GreenChannel) != 0)
2482       intensity.y+=histogram[i].s[1];
2483     if ((image->channel_mask & BlueChannel) != 0)
2484       intensity.z+=histogram[i].s[0];
2485     if ((image->channel_mask & AlphaChannel) != 0)
2486       intensity.w+=histogram[i].s[3];
2487     map[i]=intensity;
2488   }
2489   black=map[0];
2490   white=map[(int) MaxMap];
2491   (void) memset(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2492   for (i=0; i <= (ssize_t) MaxMap; i++)
2493   {
2494     if ((image->channel_mask & SyncChannels) != 0)
2495     {
2496       if (white.x != black.x)
2497         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2498                 (map[i].x-black.x))/(white.x-black.x)));
2499       continue;
2500     }
2501     if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2502       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2503               (map[i].x-black.x))/(white.x-black.x)));
2504     if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2505       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2506               (map[i].y-black.y))/(white.y-black.y)));
2507     if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2508       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2509               (map[i].z-black.z))/(white.z-black.z)));
2510     if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2511       equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2512               (map[i].w-black.w))/(white.w-black.w)));
2513   }
2514 
2515   if (image->storage_class == PseudoClass)
2516   {
2517     /*
2518        Equalize colormap.
2519        */
2520     for (i=0; i < (ssize_t) image->colors; i++)
2521     {
2522       if ((image->channel_mask & SyncChannels) != 0)
2523       {
2524         if (white.x != black.x)
2525         {
2526           image->colormap[i].red=equalize_map[
2527             ScaleQuantumToMap(image->colormap[i].red)].red;
2528           image->colormap[i].green=equalize_map[
2529             ScaleQuantumToMap(image->colormap[i].green)].red;
2530           image->colormap[i].blue=equalize_map[
2531             ScaleQuantumToMap(image->colormap[i].blue)].red;
2532           image->colormap[i].alpha=equalize_map[
2533             ScaleQuantumToMap(image->colormap[i].alpha)].red;
2534         }
2535         continue;
2536       }
2537       if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2538         image->colormap[i].red=equalize_map[
2539           ScaleQuantumToMap(image->colormap[i].red)].red;
2540       if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2541         image->colormap[i].green=equalize_map[
2542           ScaleQuantumToMap(image->colormap[i].green)].green;
2543       if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2544         image->colormap[i].blue=equalize_map[
2545           ScaleQuantumToMap(image->colormap[i].blue)].blue;
2546       if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2547         image->colormap[i].alpha=equalize_map[
2548           ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2549     }
2550   }
2551 
2552   /*
2553     Equalize image.
2554   */
2555 
2556   /* GPU can work on this again, image and equalize map as input
2557     image:        uchar4 (CLPixelPacket)
2558     equalize_map: uchar4 (PixelPacket)
2559     black, white: float4 (FloatPixelPacket) */
2560 
2561 #ifdef RECREATEBUFFER
2562   /* If the host pointer is aligned to the size of CLPixelPacket,
2563      then use the host buffer directly from the GPU; otherwise,
2564      create a buffer on the GPU and copy the data over */
2565   if (ALIGNED(inputPixels,CLPixelPacket))
2566   {
2567     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2568   }
2569   else
2570   {
2571     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2572   }
2573   /* create a CL buffer from image pixel buffer */
2574   length = image->columns * image->rows;
2575   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2576   if (clStatus != CL_SUCCESS)
2577   {
2578     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2579     goto cleanup;
2580   }
2581 #endif
2582 
2583   /* Create and initialize OpenCL buffers. */
2584   if (ALIGNED(equalize_map, PixelPacket))
2585   {
2586     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2587     hostPtr = equalize_map;
2588   }
2589   else
2590   {
2591     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2592     hostPtr = equalize_map;
2593   }
2594   /* create a CL buffer for eqaulize_map  */
2595   length = (MaxMap+1);
2596   equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2597   if (clStatus != CL_SUCCESS)
2598   {
2599     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2600     goto cleanup;
2601   }
2602 
2603   /* get the OpenCL kernel */
2604   equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2605   if (equalizeKernel == NULL)
2606   {
2607     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2608     goto cleanup;
2609   }
2610 
2611   /* set the kernel arguments */
2612   i = 0;
2613   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2614   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2615   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2616   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
2617   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
2618   if (clStatus != CL_SUCCESS)
2619   {
2620     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2621     goto cleanup;
2622   }
2623 
2624   /* launch the kernel */
2625   global_work_size[0] = image->columns;
2626   global_work_size[1] = image->rows;
2627 
2628   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2629 
2630   if (clStatus != CL_SUCCESS)
2631   {
2632     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2633     goto cleanup;
2634   }
2635   RecordProfileData(device,equalizeKernel,event);
2636 
2637   /* read the data back */
2638   if (ALIGNED(inputPixels,CLPixelPacket))
2639   {
2640     length = image->columns * image->rows;
2641     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2642   }
2643   else
2644   {
2645     length = image->columns * image->rows;
2646     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2647   }
2648   if (clStatus != CL_SUCCESS)
2649   {
2650     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2651     goto cleanup;
2652   }
2653 
2654   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2655 
2656 cleanup:
2657 
2658   image_view=DestroyCacheView(image_view);
2659 
2660   if (imageBuffer!=NULL)
2661     clEnv->library->clReleaseMemObject(imageBuffer);
2662   if (map!=NULL)
2663     map=(cl_float4 *) RelinquishMagickMemory(map);
2664   if (equalizeMapBuffer!=NULL)
2665     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2666   if (equalize_map!=NULL)
2667     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2668   if (histogramBuffer!=NULL)
2669     clEnv->library->clReleaseMemObject(histogramBuffer);
2670   if (histogram!=NULL)
2671     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2672   if (histogramKernel!=NULL)
2673     ReleaseOpenCLKernel(histogramKernel);
2674   if (equalizeKernel!=NULL)
2675     ReleaseOpenCLKernel(equalizeKernel);
2676   if (queue != NULL)
2677     ReleaseOpenCLCommandQueue(device, queue);
2678   if (device != NULL)
2679     ReleaseOpenCLDevice(device);
2680 
2681   return(outputReady);
2682 }
2683 
AccelerateEqualizeImage(Image * image,ExceptionInfo * exception)2684 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2685   ExceptionInfo *exception)
2686 {
2687   MagickBooleanType
2688     status;
2689 
2690   MagickCLEnv
2691     clEnv;
2692 
2693   assert(image != NULL);
2694   assert(exception != (ExceptionInfo *) NULL);
2695   if (image->debug != MagickFalse)
2696     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2697 
2698   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2699       (checkHistogramCondition(image,image->intensity) == MagickFalse))
2700     return(MagickFalse);
2701 
2702   clEnv=getOpenCLEnvironment(exception);
2703   if (clEnv == (MagickCLEnv) NULL)
2704     return(MagickFalse);
2705 
2706   status=ComputeEqualizeImage(image,clEnv,exception);
2707   return(status);
2708 }
2709 
2710 /*
2711 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2712 %                                                                             %
2713 %                                                                             %
2714 %                                                                             %
2715 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
2716 %                                                                             %
2717 %                                                                             %
2718 %                                                                             %
2719 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2720 */
2721 
ComputeFunctionImage(Image * image,MagickCLEnv clEnv,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2722 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2723   const MagickFunction function,const size_t number_parameters,
2724   const double *parameters,ExceptionInfo *exception)
2725 {
2726   cl_command_queue
2727     queue;
2728 
2729   cl_int
2730     status;
2731 
2732   cl_kernel
2733     functionKernel;
2734 
2735   cl_mem
2736     imageBuffer,
2737     parametersBuffer;
2738 
2739   cl_uint
2740     number_params,
2741     number_channels;
2742 
2743   float
2744     *parametersBufferPtr;
2745 
2746   MagickBooleanType
2747     outputReady;
2748 
2749   MagickCLDevice
2750     device;
2751 
2752   size_t
2753     gsize[2],
2754     i;
2755 
2756   assert(image != (Image *) NULL);
2757   assert(image->signature == MagickCoreSignature);
2758   if (image->debug != MagickFalse)
2759     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2760 
2761   outputReady=MagickFalse;
2762   imageBuffer=NULL;
2763   functionKernel=NULL;
2764   parametersBuffer=NULL;
2765 
2766   device=RequestOpenCLDevice(clEnv);
2767   queue=AcquireOpenCLCommandQueue(device);
2768   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2769   if (imageBuffer == (cl_mem) NULL)
2770     goto cleanup;
2771 
2772   parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2773     sizeof(float));
2774   if (parametersBufferPtr == (float *) NULL)
2775     goto cleanup;
2776   for (i=0; i<number_parameters; i++)
2777     parametersBufferPtr[i]=(float) parameters[i];
2778   parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2779     CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2780     parametersBufferPtr);
2781   parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2782   if (parametersBuffer == (cl_mem) NULL)
2783   {
2784     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2785       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2786     goto cleanup;
2787   }
2788 
2789   functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2790   if (functionKernel == (cl_kernel) NULL)
2791   {
2792     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2793       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2794     goto cleanup;
2795   }
2796 
2797   number_channels=(cl_uint) image->number_channels;
2798   number_params=(cl_uint) number_parameters;
2799 
2800   i=0;
2801   status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2802   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2803   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2804   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2805   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2806   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2807   if (status != CL_SUCCESS)
2808   {
2809     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2810       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2811     goto cleanup;
2812   }
2813 
2814   gsize[0]=image->columns;
2815   gsize[1]=image->rows;
2816   outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2817     gsize,(const size_t *) NULL,image,(const Image *) NULL,MagickFalse,
2818     exception);
2819 
2820 cleanup:
2821 
2822   if (imageBuffer != (cl_mem) NULL)
2823     ReleaseOpenCLMemObject(imageBuffer);
2824   if (parametersBuffer != (cl_mem) NULL)
2825     ReleaseOpenCLMemObject(parametersBuffer);
2826   if (functionKernel != (cl_kernel) NULL)
2827     ReleaseOpenCLKernel(functionKernel);
2828   if (queue != (cl_command_queue) NULL)
2829     ReleaseOpenCLCommandQueue(device,queue);
2830   if (device != (MagickCLDevice) NULL)
2831     ReleaseOpenCLDevice(device);
2832   return(outputReady);
2833 }
2834 
AccelerateFunctionImage(Image * image,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2835 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2836   const MagickFunction function,const size_t number_parameters,
2837   const double *parameters,ExceptionInfo *exception)
2838 {
2839   MagickBooleanType
2840     status;
2841 
2842   MagickCLEnv
2843     clEnv;
2844 
2845   assert(image != NULL);
2846   assert(exception != (ExceptionInfo *) NULL);
2847   if (image->debug != MagickFalse)
2848     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2849 
2850   if (checkAccelerateCondition(image) == MagickFalse)
2851     return(MagickFalse);
2852 
2853   clEnv=getOpenCLEnvironment(exception);
2854   if (clEnv == (MagickCLEnv) NULL)
2855     return(MagickFalse);
2856 
2857   status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2858     parameters,exception);
2859   return(status);
2860 }
2861 
2862 /*
2863 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2864 %                                                                             %
2865 %                                                                             %
2866 %                                                                             %
2867 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2868 %                                                                             %
2869 %                                                                             %
2870 %                                                                             %
2871 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2872 */
2873 
ComputeGrayscaleImage(Image * image,MagickCLEnv clEnv,const PixelIntensityMethod method,ExceptionInfo * exception)2874 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2875   const PixelIntensityMethod method,ExceptionInfo *exception)
2876 {
2877   cl_command_queue
2878     queue;
2879 
2880   cl_int
2881     status;
2882 
2883   cl_kernel
2884     grayscaleKernel;
2885 
2886   cl_mem
2887     imageBuffer;
2888 
2889   cl_uint
2890     number_channels,
2891     colorspace,
2892     intensityMethod;
2893 
2894   MagickBooleanType
2895     outputReady;
2896 
2897   MagickCLDevice
2898     device;
2899 
2900   size_t
2901     gsize[2],
2902     i;
2903 
2904   assert(image != (Image *) NULL);
2905   assert(image->signature == MagickCoreSignature);
2906   if (image->debug != MagickFalse)
2907     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2908 
2909   outputReady=MagickFalse;
2910   imageBuffer=NULL;
2911   grayscaleKernel=NULL;
2912 
2913   device=RequestOpenCLDevice(clEnv);
2914   queue=AcquireOpenCLCommandQueue(device);
2915   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2916   if (imageBuffer == (cl_mem) NULL)
2917     goto cleanup;
2918 
2919   grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2920   if (grayscaleKernel == (cl_kernel) NULL)
2921   {
2922     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2923       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2924     goto cleanup;
2925   }
2926 
2927   number_channels=(cl_uint) image->number_channels;
2928   intensityMethod=(cl_uint) method;
2929   colorspace=(cl_uint) image->colorspace;
2930 
2931   i=0;
2932   status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2933   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2934   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2935   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2936   if (status != CL_SUCCESS)
2937   {
2938     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2939       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2940     goto cleanup;
2941   }
2942 
2943   gsize[0]=image->columns;
2944   gsize[1]=image->rows;
2945   outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
2946     (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
2947     MagickFalse,exception);
2948 
2949 cleanup:
2950 
2951   if (imageBuffer != (cl_mem) NULL)
2952     ReleaseOpenCLMemObject(imageBuffer);
2953   if (grayscaleKernel != (cl_kernel) NULL)
2954     ReleaseOpenCLKernel(grayscaleKernel);
2955   if (queue != (cl_command_queue) NULL)
2956     ReleaseOpenCLCommandQueue(device,queue);
2957   if (device != (MagickCLDevice) NULL)
2958     ReleaseOpenCLDevice(device);
2959 
2960   return(outputReady);
2961 }
2962 
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)2963 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
2964   const PixelIntensityMethod method,ExceptionInfo *exception)
2965 {
2966   MagickBooleanType
2967     status;
2968 
2969   MagickCLEnv
2970     clEnv;
2971 
2972   assert(image != NULL);
2973   assert(exception != (ExceptionInfo *) NULL);
2974   if (image->debug != MagickFalse)
2975     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2976 
2977   if ((checkAccelerateCondition(image) == MagickFalse) ||
2978       (checkPixelIntensity(image,method) == MagickFalse))
2979     return(MagickFalse);
2980 
2981   if (image->number_channels < 3)
2982     return(MagickFalse);
2983 
2984   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
2985       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
2986       (GetPixelBlueTraits(image) == UndefinedPixelTrait))
2987     return(MagickFalse);
2988 
2989   clEnv=getOpenCLEnvironment(exception);
2990   if (clEnv == (MagickCLEnv) NULL)
2991     return(MagickFalse);
2992 
2993   status=ComputeGrayscaleImage(image,clEnv,method,exception);
2994   return(status);
2995 }
2996 
2997 /*
2998 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2999 %                                                                             %
3000 %                                                                             %
3001 %                                                                             %
3002 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
3003 %                                                                             %
3004 %                                                                             %
3005 %                                                                             %
3006 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3007 */
3008 
ComputeLocalContrastImage(const Image * image,MagickCLEnv clEnv,const double radius,const double strength,ExceptionInfo * exception)3009 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
3010   const double radius,const double strength,ExceptionInfo *exception)
3011 {
3012   CacheView
3013     *filteredImage_view,
3014     *image_view;
3015 
3016   cl_command_queue
3017     queue;
3018 
3019   cl_int
3020     clStatus,
3021     iRadius;
3022 
3023   cl_kernel
3024     blurRowKernel,
3025     blurColumnKernel;
3026 
3027   cl_event
3028     event;
3029 
3030   cl_mem
3031     filteredImageBuffer,
3032     imageBuffer,
3033     imageKernelBuffer,
3034     tempImageBuffer;
3035 
3036   cl_mem_flags
3037     mem_flags;
3038 
3039   const void
3040     *inputPixels;
3041 
3042   Image
3043     *filteredImage;
3044 
3045   MagickBooleanType
3046     outputReady;
3047 
3048   MagickCLDevice
3049     device;
3050 
3051   MagickSizeType
3052     length;
3053 
3054   void
3055     *filteredPixels,
3056     *hostPtr;
3057 
3058   unsigned int
3059     i,
3060     imageColumns,
3061     imageRows,
3062     passes;
3063 
3064   filteredImage = NULL;
3065   filteredImage_view = NULL;
3066   imageBuffer = NULL;
3067   filteredImageBuffer = NULL;
3068   tempImageBuffer = NULL;
3069   imageKernelBuffer = NULL;
3070   blurRowKernel = NULL;
3071   blurColumnKernel = NULL;
3072   queue = NULL;
3073   outputReady = MagickFalse;
3074 
3075   device = RequestOpenCLDevice(clEnv);
3076   queue = AcquireOpenCLCommandQueue(device);
3077 
3078   /* Create and initialize OpenCL buffers. */
3079   {
3080     image_view=AcquireAuthenticCacheView(image,exception);
3081     inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3082     if (inputPixels == (const void *) NULL)
3083     {
3084       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3085       goto cleanup;
3086     }
3087 
3088     /* If the host pointer is aligned to the size of CLPixelPacket,
3089      then use the host buffer directly from the GPU; otherwise,
3090      create a buffer on the GPU and copy the data over */
3091     if (ALIGNED(inputPixels,CLPixelPacket))
3092     {
3093       mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3094     }
3095     else
3096     {
3097       mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3098     }
3099     /* create a CL buffer from image pixel buffer */
3100     length = image->columns * image->rows;
3101     imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3102     if (clStatus != CL_SUCCESS)
3103     {
3104       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3105       goto cleanup;
3106     }
3107   }
3108 
3109   /* create output */
3110   {
3111     filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3112     assert(filteredImage != NULL);
3113     if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3114     {
3115       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3116       goto cleanup;
3117     }
3118     filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3119     filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3120     if (filteredPixels == (void *) NULL)
3121     {
3122       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3123       goto cleanup;
3124     }
3125 
3126     if (ALIGNED(filteredPixels,CLPixelPacket))
3127     {
3128       mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3129       hostPtr = filteredPixels;
3130     }
3131     else
3132     {
3133       mem_flags = CL_MEM_WRITE_ONLY;
3134       hostPtr = NULL;
3135     }
3136 
3137     /* create a CL buffer from image pixel buffer */
3138     length = image->columns * image->rows;
3139     filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3140     if (clStatus != CL_SUCCESS)
3141     {
3142       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3143       goto cleanup;
3144     }
3145   }
3146 
3147   {
3148     /* create temp buffer */
3149     {
3150       length = image->columns * image->rows;
3151       tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3152       if (clStatus != CL_SUCCESS)
3153       {
3154         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3155         goto cleanup;
3156       }
3157     }
3158 
3159     /* get the opencl kernel */
3160     {
3161       blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3162       if (blurRowKernel == NULL)
3163       {
3164         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3165         goto cleanup;
3166       };
3167 
3168       blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3169       if (blurColumnKernel == NULL)
3170       {
3171         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3172         goto cleanup;
3173       };
3174     }
3175 
3176     {
3177       imageColumns = (unsigned int) image->columns;
3178       imageRows = (unsigned int) image->rows;
3179       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); /* Normalized radius, 100% gives blur radius of 20% of the largest dimension */
3180 
3181       passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3182       passes = (passes < 1) ? 1: passes;
3183 
3184       /* set the kernel arguments */
3185       i = 0;
3186       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3187       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3188       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3189       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3190       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3191       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3192 
3193       if (clStatus != CL_SUCCESS)
3194       {
3195         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3196         goto cleanup;
3197       }
3198     }
3199 
3200     /* launch the kernel */
3201     {
3202       int x;
3203       for (x = 0; x < passes; ++x) {
3204         size_t gsize[2];
3205         size_t wsize[2];
3206         size_t goffset[2];
3207 
3208         gsize[0] = 256;
3209         gsize[1] = (image->rows + passes - 1) / passes;
3210         wsize[0] = 256;
3211         wsize[1] = 1;
3212         goffset[0] = 0;
3213         goffset[1] = x * gsize[1];
3214 
3215         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3216         if (clStatus != CL_SUCCESS)
3217         {
3218           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3219           goto cleanup;
3220         }
3221         clEnv->library->clFlush(queue);
3222         RecordProfileData(device,blurRowKernel,event);
3223       }
3224     }
3225 
3226     {
3227       cl_float FStrength = strength;
3228       i = 0;
3229       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3230       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3231       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3232       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3233       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3234       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3235       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3236 
3237       if (clStatus != CL_SUCCESS)
3238       {
3239         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3240         goto cleanup;
3241       }
3242     }
3243 
3244     /* launch the kernel */
3245     {
3246       int x;
3247       for (x = 0; x < passes; ++x) {
3248         size_t gsize[2];
3249         size_t wsize[2];
3250         size_t goffset[2];
3251 
3252         gsize[0] = ((image->columns + 3) / 4) * 4;
3253         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3254         wsize[0] = 4;
3255         wsize[1] = 64;
3256         goffset[0] = 0;
3257         goffset[1] = x * gsize[1];
3258 
3259         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3260         if (clStatus != CL_SUCCESS)
3261         {
3262           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3263           goto cleanup;
3264         }
3265         clEnv->library->clFlush(queue);
3266         RecordProfileData(device,blurColumnKernel,event);
3267       }
3268     }
3269   }
3270 
3271   /* get result */
3272   if (ALIGNED(filteredPixels,CLPixelPacket))
3273   {
3274     length = image->columns * image->rows;
3275     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3276   }
3277   else
3278   {
3279     length = image->columns * image->rows;
3280     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3281   }
3282   if (clStatus != CL_SUCCESS)
3283   {
3284     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3285     goto cleanup;
3286   }
3287 
3288   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3289 
3290 cleanup:
3291 
3292   image_view=DestroyCacheView(image_view);
3293   if (filteredImage_view != NULL)
3294     filteredImage_view=DestroyCacheView(filteredImage_view);
3295 
3296   if (imageBuffer!=NULL)
3297     clEnv->library->clReleaseMemObject(imageBuffer);
3298   if (filteredImageBuffer!=NULL)
3299     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3300   if (tempImageBuffer!=NULL)
3301     clEnv->library->clReleaseMemObject(tempImageBuffer);
3302   if (imageKernelBuffer!=NULL)
3303     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3304   if (blurRowKernel!=NULL)
3305     ReleaseOpenCLKernel(blurRowKernel);
3306   if (blurColumnKernel!=NULL)
3307     ReleaseOpenCLKernel(blurColumnKernel);
3308   if (queue != NULL)
3309     ReleaseOpenCLCommandQueue(device, queue);
3310   if (device != NULL)
3311     ReleaseOpenCLDevice(device);
3312   if (outputReady == MagickFalse)
3313   {
3314     if (filteredImage != NULL)
3315     {
3316       DestroyImage(filteredImage);
3317       filteredImage = NULL;
3318     }
3319   }
3320 
3321   return(filteredImage);
3322 }
3323 
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3324 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3325   const double radius,const double strength,ExceptionInfo *exception)
3326 {
3327   Image
3328     *filteredImage;
3329 
3330   MagickCLEnv
3331     clEnv;
3332 
3333   assert(image != NULL);
3334   assert(exception != (ExceptionInfo *) NULL);
3335 
3336   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3337     return((Image *) NULL);
3338 
3339   clEnv=getOpenCLEnvironment(exception);
3340   if (clEnv == (MagickCLEnv) NULL)
3341     return((Image *) NULL);
3342 
3343   filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3344     exception);
3345   return(filteredImage);
3346 }
3347 
3348 /*
3349 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3350 %                                                                             %
3351 %                                                                             %
3352 %                                                                             %
3353 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
3354 %                                                                             %
3355 %                                                                             %
3356 %                                                                             %
3357 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3358 */
3359 
ComputeModulateImage(Image * image,MagickCLEnv clEnv,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3360 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3361   const double percent_brightness,const double percent_hue,
3362   const double percent_saturation,const ColorspaceType colorspace,
3363   ExceptionInfo *exception)
3364 {
3365   CacheView
3366     *image_view;
3367 
3368   cl_float
3369     bright,
3370     hue,
3371     saturation;
3372 
3373   cl_command_queue
3374     queue;
3375 
3376   cl_int
3377     color,
3378     clStatus;
3379 
3380   cl_kernel
3381     modulateKernel;
3382 
3383   cl_event
3384     event;
3385 
3386   cl_mem
3387     imageBuffer;
3388 
3389   cl_mem_flags
3390     mem_flags;
3391 
3392   MagickBooleanType
3393     outputReady;
3394 
3395   MagickCLDevice
3396     device;
3397 
3398   MagickSizeType
3399     length;
3400 
3401   ssize_t
3402     i;
3403 
3404   void
3405     *inputPixels;
3406 
3407   assert(image != (Image *) NULL);
3408   assert(image->signature == MagickCoreSignature);
3409   if (image->debug != MagickFalse)
3410     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3411 
3412   inputPixels = NULL;
3413   imageBuffer = NULL;
3414   modulateKernel = NULL;
3415 
3416   /*
3417    * initialize opencl env
3418    */
3419   device = RequestOpenCLDevice(clEnv);
3420   queue = AcquireOpenCLCommandQueue(device);
3421 
3422   outputReady = MagickFalse;
3423 
3424   /* Create and initialize OpenCL buffers.
3425    inputPixels = AcquirePixelCachePixels(image, &length, exception);
3426    assume this  will get a writable image
3427    */
3428   image_view=AcquireAuthenticCacheView(image,exception);
3429   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3430   if (inputPixels == (void *) NULL)
3431   {
3432     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3433     goto cleanup;
3434   }
3435 
3436   /* If the host pointer is aligned to the size of CLPixelPacket,
3437    then use the host buffer directly from the GPU; otherwise,
3438    create a buffer on the GPU and copy the data over
3439    */
3440   if (ALIGNED(inputPixels,CLPixelPacket))
3441   {
3442     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3443   }
3444   else
3445   {
3446     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3447   }
3448   /* create a CL buffer from image pixel buffer */
3449   length = image->columns * image->rows;
3450   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3451   if (clStatus != CL_SUCCESS)
3452   {
3453     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3454     goto cleanup;
3455   }
3456 
3457   modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3458   if (modulateKernel == NULL)
3459   {
3460     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3461     goto cleanup;
3462   }
3463 
3464   bright=percent_brightness;
3465   hue=percent_hue;
3466   saturation=percent_saturation;
3467   color=colorspace;
3468 
3469   i = 0;
3470   clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3471   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3472   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3473   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3474   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3475   if (clStatus != CL_SUCCESS)
3476   {
3477     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3478     goto cleanup;
3479   }
3480 
3481   {
3482     size_t global_work_size[2];
3483     global_work_size[0] = image->columns;
3484     global_work_size[1] = image->rows;
3485     /* launch the kernel */
3486 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3487     if (clStatus != CL_SUCCESS)
3488     {
3489       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3490       goto cleanup;
3491     }
3492     RecordProfileData(device,modulateKernel,event);
3493   }
3494 
3495   if (ALIGNED(inputPixels,CLPixelPacket))
3496   {
3497     length = image->columns * image->rows;
3498     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3499   }
3500   else
3501   {
3502     length = image->columns * image->rows;
3503     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3504   }
3505   if (clStatus != CL_SUCCESS)
3506   {
3507     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3508     goto cleanup;
3509   }
3510 
3511   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3512 
3513 cleanup:
3514 
3515   image_view=DestroyCacheView(image_view);
3516 
3517   if (imageBuffer!=NULL)
3518     clEnv->library->clReleaseMemObject(imageBuffer);
3519   if (modulateKernel!=NULL)
3520     ReleaseOpenCLKernel(modulateKernel);
3521   if (queue != NULL)
3522     ReleaseOpenCLCommandQueue(device,queue);
3523   if (device != NULL)
3524     ReleaseOpenCLDevice(device);
3525 
3526   return outputReady;
3527 
3528 }
3529 
AccelerateModulateImage(Image * image,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3530 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3531   const double percent_brightness,const double percent_hue,
3532   const double percent_saturation,const ColorspaceType colorspace,
3533   ExceptionInfo *exception)
3534 {
3535   MagickBooleanType
3536     status;
3537 
3538   MagickCLEnv
3539     clEnv;
3540 
3541   assert(image != NULL);
3542   assert(exception != (ExceptionInfo *) NULL);
3543   if (image->debug != MagickFalse)
3544     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3545 
3546   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3547     return(MagickFalse);
3548 
3549   if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3550     return(MagickFalse);
3551 
3552   clEnv=getOpenCLEnvironment(exception);
3553   if (clEnv == (MagickCLEnv) NULL)
3554     return(MagickFalse);
3555 
3556   status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3557     percent_saturation,colorspace,exception);
3558   return(status);
3559 }
3560 
3561 /*
3562 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3563 %                                                                             %
3564 %                                                                             %
3565 %                                                                             %
3566 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3567 %                                                                             %
3568 %                                                                             %
3569 %                                                                             %
3570 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3571 */
3572 
ComputeMotionBlurImage(const Image * image,MagickCLEnv clEnv,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3573 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3574   const double *kernel,const size_t width,const OffsetInfo *offset,
3575   ExceptionInfo *exception)
3576 {
3577   CacheView
3578     *filteredImage_view,
3579     *image_view;
3580 
3581   cl_command_queue
3582     queue;
3583 
3584   cl_float4
3585     biasPixel;
3586 
3587   cl_int
3588     clStatus;
3589 
3590   cl_kernel
3591     motionBlurKernel;
3592 
3593   cl_event
3594     event;
3595 
3596   cl_mem
3597     filteredImageBuffer,
3598     imageBuffer,
3599     imageKernelBuffer,
3600     offsetBuffer;
3601 
3602   cl_mem_flags
3603     mem_flags;
3604 
3605   const void
3606     *inputPixels;
3607 
3608   float
3609     *kernelBufferPtr;
3610 
3611   Image
3612     *filteredImage;
3613 
3614   int
3615     *offsetBufferPtr;
3616 
3617   MagickBooleanType
3618     outputReady;
3619 
3620   MagickCLDevice
3621     device;
3622 
3623   PixelInfo
3624     bias;
3625 
3626   MagickSizeType
3627     length;
3628 
3629   size_t
3630     global_work_size[2],
3631     local_work_size[2];
3632 
3633   unsigned int
3634     i,
3635     imageHeight,
3636     imageWidth,
3637     matte;
3638 
3639   void
3640     *filteredPixels,
3641     *hostPtr;
3642 
3643   assert(image != (Image *) NULL);
3644   assert(image->signature == MagickCoreSignature);
3645   if (image->debug != MagickFalse)
3646     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3647 
3648   outputReady = MagickFalse;
3649   filteredImage = NULL;
3650   filteredImage_view = NULL;
3651   imageBuffer = NULL;
3652   filteredImageBuffer = NULL;
3653   imageKernelBuffer = NULL;
3654   motionBlurKernel = NULL;
3655   queue = NULL;
3656 
3657   device = RequestOpenCLDevice(clEnv);
3658 
3659   /* Create and initialize OpenCL buffers. */
3660 
3661   image_view=AcquireAuthenticCacheView(image,exception);
3662   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,
3663     image->rows,exception);
3664   if (inputPixels == (const void *) NULL)
3665   {
3666     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3667       "UnableToReadPixelCache.","`%s'",image->filename);
3668     goto cleanup;
3669   }
3670 
3671   /*
3672     If the host pointer is aligned to the size of CLPixelPacket, then use
3673     the host buffer directly from the GPU; otherwise, create a buffer on
3674     the GPU and copy the data over
3675   */
3676   if (ALIGNED(inputPixels,CLPixelPacket))
3677   {
3678     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3679   }
3680   else
3681   {
3682     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3683   }
3684   /*
3685     create a CL buffer from image pixel buffer
3686   */
3687   length = image->columns * image->rows;
3688   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3689     length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3690   if (clStatus != CL_SUCCESS)
3691   {
3692     (void) ThrowMagickException(exception, GetMagickModule(),
3693       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3694     goto cleanup;
3695   }
3696 
3697 
3698   filteredImage = CloneImage(image,image->columns,image->rows,
3699     MagickTrue,exception);
3700   assert(filteredImage != NULL);
3701   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3702   {
3703     (void) ThrowMagickException(exception, GetMagickModule(),
3704       ResourceLimitError, "CloneImage failed.", ".");
3705     goto cleanup;
3706   }
3707   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3708   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3709   if (filteredPixels == (void *) NULL)
3710   {
3711     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3712       "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3713     goto cleanup;
3714   }
3715 
3716   if (ALIGNED(filteredPixels,CLPixelPacket))
3717   {
3718     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3719     hostPtr = filteredPixels;
3720   }
3721   else
3722   {
3723     mem_flags = CL_MEM_WRITE_ONLY;
3724     hostPtr = NULL;
3725   }
3726   /*
3727     Create a CL buffer from image pixel buffer.
3728   */
3729   length = image->columns * image->rows;
3730   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3731     length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3732   if (clStatus != CL_SUCCESS)
3733   {
3734     (void) ThrowMagickException(exception, GetMagickModule(),
3735       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3736     goto cleanup;
3737   }
3738 
3739 
3740   imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3741     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3742     &clStatus);
3743   if (clStatus != CL_SUCCESS)
3744   {
3745     (void) ThrowMagickException(exception, GetMagickModule(),
3746       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3747     goto cleanup;
3748   }
3749 
3750   queue = AcquireOpenCLCommandQueue(device);
3751   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3752     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3753   if (clStatus != CL_SUCCESS)
3754   {
3755     (void) ThrowMagickException(exception, GetMagickModule(),
3756       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3757     goto cleanup;
3758   }
3759   for (i = 0; i < width; i++)
3760   {
3761     kernelBufferPtr[i] = (float) kernel[i];
3762   }
3763   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3764     0, NULL, NULL);
3765  if (clStatus != CL_SUCCESS)
3766   {
3767     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3768       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3769     goto cleanup;
3770   }
3771 
3772   offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3773     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3774     &clStatus);
3775   if (clStatus != CL_SUCCESS)
3776   {
3777     (void) ThrowMagickException(exception, GetMagickModule(),
3778       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3779     goto cleanup;
3780   }
3781 
3782   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3783     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3784   if (clStatus != CL_SUCCESS)
3785   {
3786     (void) ThrowMagickException(exception, GetMagickModule(),
3787       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3788     goto cleanup;
3789   }
3790   for (i = 0; i < width; i++)
3791   {
3792     offsetBufferPtr[2*i] = (int)offset[i].x;
3793     offsetBufferPtr[2*i+1] = (int)offset[i].y;
3794   }
3795   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3796     NULL, NULL);
3797  if (clStatus != CL_SUCCESS)
3798   {
3799     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3800       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3801     goto cleanup;
3802   }
3803 
3804 
3805   /*
3806     Get the OpenCL kernel
3807   */
3808   motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3809   if (motionBlurKernel == NULL)
3810   {
3811     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3812       "AcquireOpenCLKernel failed.", ".");
3813     goto cleanup;
3814   }
3815 
3816   /*
3817     Set the kernel arguments.
3818   */
3819   i = 0;
3820   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3821     (void *)&imageBuffer);
3822   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3823     (void *)&filteredImageBuffer);
3824   imageWidth = (unsigned int) image->columns;
3825   imageHeight = (unsigned int) image->rows;
3826   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3827     &imageWidth);
3828   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3829     &imageHeight);
3830   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3831     (void *)&imageKernelBuffer);
3832   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3833     &width);
3834   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3835     (void *)&offsetBuffer);
3836 
3837   GetPixelInfo(image,&bias);
3838   biasPixel.s[0] = bias.red;
3839   biasPixel.s[1] = bias.green;
3840   biasPixel.s[2] = bias.blue;
3841   biasPixel.s[3] = bias.alpha;
3842   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3843 
3844   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3845   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3846   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3847   if (clStatus != CL_SUCCESS)
3848   {
3849     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3850       "clEnv->library->clSetKernelArg failed.", ".");
3851     goto cleanup;
3852   }
3853 
3854   /*
3855     Launch the kernel.
3856   */
3857   local_work_size[0] = 16;
3858   local_work_size[1] = 16;
3859   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3860                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3861   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3862                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3863   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3864 	  global_work_size, local_work_size, 0, NULL, &event);
3865 
3866   if (clStatus != CL_SUCCESS)
3867   {
3868     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3869       "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3870     goto cleanup;
3871   }
3872   RecordProfileData(device,motionBlurKernel,event);
3873 
3874   if (ALIGNED(filteredPixels,CLPixelPacket))
3875   {
3876     length = image->columns * image->rows;
3877     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3878       CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3879       NULL, &clStatus);
3880   }
3881   else
3882   {
3883     length = image->columns * image->rows;
3884     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3885       length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3886   }
3887   if (clStatus != CL_SUCCESS)
3888   {
3889     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3890       "Reading output image from CL buffer failed.", ".");
3891     goto cleanup;
3892   }
3893   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3894 
3895 cleanup:
3896 
3897   image_view=DestroyCacheView(image_view);
3898   if (filteredImage_view != NULL)
3899     filteredImage_view=DestroyCacheView(filteredImage_view);
3900 
3901   if (filteredImageBuffer!=NULL)
3902     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3903   if (imageBuffer!=NULL)
3904     clEnv->library->clReleaseMemObject(imageBuffer);
3905   if (imageKernelBuffer!=NULL)
3906     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3907   if (motionBlurKernel!=NULL)
3908     ReleaseOpenCLKernel(motionBlurKernel);
3909   if (queue != NULL)
3910     ReleaseOpenCLCommandQueue(device,queue);
3911   if (device != NULL)
3912     ReleaseOpenCLDevice(device);
3913   if (outputReady == MagickFalse && filteredImage != NULL)
3914     filteredImage=DestroyImage(filteredImage);
3915 
3916   return(filteredImage);
3917 }
3918 
AccelerateMotionBlurImage(const Image * image,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3919 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3920   const double* kernel,const size_t width,const OffsetInfo *offset,
3921   ExceptionInfo *exception)
3922 {
3923   Image
3924     *filteredImage;
3925 
3926   MagickCLEnv
3927     clEnv;
3928 
3929   assert(image != NULL);
3930   assert(kernel != (double *) NULL);
3931   assert(offset != (OffsetInfo *) NULL);
3932   assert(exception != (ExceptionInfo *) NULL);
3933 
3934   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3935     return((Image *) NULL);
3936 
3937   clEnv=getOpenCLEnvironment(exception);
3938   if (clEnv == (MagickCLEnv) NULL)
3939     return((Image *) NULL);
3940 
3941   filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3942     exception);
3943   return(filteredImage);
3944 }
3945 
3946 /*
3947 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3948 %                                                                             %
3949 %                                                                             %
3950 %                                                                             %
3951 %     A c c e l e r a t e R e s i z e I m a g e                               %
3952 %                                                                             %
3953 %                                                                             %
3954 %                                                                             %
3955 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3956 */
3957 
resizeHorizontalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,ExceptionInfo * exception)3958 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3959   cl_command_queue queue,const Image *image,Image *filteredImage,
3960   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3961   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3962   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3963   const float xFactor,ExceptionInfo *exception)
3964 {
3965   cl_kernel
3966     horizontalKernel;
3967 
3968   cl_int
3969     status;
3970 
3971   const unsigned int
3972     workgroupSize = 256;
3973 
3974   float
3975     resizeFilterScale,
3976     resizeFilterSupport,
3977     resizeFilterWindowSupport,
3978     resizeFilterBlur,
3979     scale,
3980     support;
3981 
3982   int
3983     cacheRangeStart,
3984     cacheRangeEnd,
3985     numCachedPixels,
3986     resizeFilterType,
3987     resizeWindowType;
3988 
3989   MagickBooleanType
3990     outputReady;
3991 
3992   size_t
3993     gammaAccumulatorLocalMemorySize,
3994     gsize[2],
3995     i,
3996     imageCacheLocalMemorySize,
3997     pixelAccumulatorLocalMemorySize,
3998     lsize[2],
3999     totalLocalMemorySize,
4000     weightAccumulatorLocalMemorySize;
4001 
4002   unsigned int
4003     chunkSize,
4004     pixelPerWorkgroup;
4005 
4006   horizontalKernel=NULL;
4007   outputReady=MagickFalse;
4008 
4009   /*
4010   Apply filter to resize vertically from image to resize image.
4011   */
4012   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4013   support=scale*GetResizeFilterSupport(resizeFilter);
4014   if (support < 0.5)
4015   {
4016     /*
4017     Support too small even for nearest neighbour: Reduce to point
4018     sampling.
4019     */
4020     support=(float) 0.5;
4021     scale=1.0;
4022   }
4023   scale=PerceptibleReciprocal(scale);
4024 
4025   if (resizedColumns < workgroupSize)
4026   {
4027     chunkSize=32;
4028     pixelPerWorkgroup=32;
4029   }
4030   else
4031   {
4032     chunkSize=workgroupSize;
4033     pixelPerWorkgroup=workgroupSize;
4034   }
4035 
4036 DisableMSCWarning(4127)
4037   while(1)
4038 RestoreMSCWarning
4039   {
4040     /* calculate the local memory size needed per workgroup */
4041     cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4042     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
4043       MagickEpsilon)+support+0.5);
4044     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4045     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4046       number_channels;
4047     totalLocalMemorySize=imageCacheLocalMemorySize;
4048 
4049     /* local size for the pixel accumulator */
4050     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4051     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4052 
4053     /* local memory size for the weight accumulator */
4054     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4055     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4056 
4057     /* local memory size for the gamma accumulator */
4058     if ((number_channels == 4) || (number_channels == 2))
4059       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4060     else
4061       gammaAccumulatorLocalMemorySize=sizeof(float);
4062     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4063 
4064     if (totalLocalMemorySize <= device->local_memory_size)
4065       break;
4066     else
4067     {
4068       pixelPerWorkgroup=pixelPerWorkgroup/2;
4069       chunkSize=chunkSize/2;
4070       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4071       {
4072         /* quit, fallback to CPU */
4073         goto cleanup;
4074       }
4075     }
4076   }
4077 
4078   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4079   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4080 
4081   horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4082   if (horizontalKernel == (cl_kernel) NULL)
4083   {
4084     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4085       ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4086     goto cleanup;
4087   }
4088 
4089   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4090   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4091   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4092   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4093 
4094   i=0;
4095   status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4096   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4097   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4098   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4099   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4100   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4101   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4102   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4103   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4104   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4105   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4106   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4107   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4108   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4109   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4110   status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4111   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4112   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4113   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4114   status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4115   status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4116   status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4117 
4118   if (status != CL_SUCCESS)
4119   {
4120     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4121       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4122     goto cleanup;
4123   }
4124 
4125   gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4126     workgroupSize;
4127   gsize[1]=resizedRows;
4128   lsize[0]=workgroupSize;
4129   lsize[1]=1;
4130   outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4131     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4132     exception);
4133 
4134 cleanup:
4135 
4136   if (horizontalKernel != (cl_kernel) NULL)
4137     ReleaseOpenCLKernel(horizontalKernel);
4138 
4139   return(outputReady);
4140 }
4141 
resizeVerticalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,ExceptionInfo * exception)4142 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4143   cl_command_queue queue,const Image *image,Image * filteredImage,
4144   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4145   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4146   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4147   const float yFactor,ExceptionInfo *exception)
4148 {
4149   cl_kernel
4150     verticalKernel;
4151 
4152   cl_int
4153     status;
4154 
4155   const unsigned int
4156     workgroupSize = 256;
4157 
4158   float
4159     resizeFilterScale,
4160     resizeFilterSupport,
4161     resizeFilterWindowSupport,
4162     resizeFilterBlur,
4163     scale,
4164     support;
4165 
4166   int
4167     cacheRangeStart,
4168     cacheRangeEnd,
4169     numCachedPixels,
4170     resizeFilterType,
4171     resizeWindowType;
4172 
4173   MagickBooleanType
4174     outputReady;
4175 
4176   size_t
4177     gammaAccumulatorLocalMemorySize,
4178     gsize[2],
4179     i,
4180     imageCacheLocalMemorySize,
4181     pixelAccumulatorLocalMemorySize,
4182     lsize[2],
4183     totalLocalMemorySize,
4184     weightAccumulatorLocalMemorySize;
4185 
4186   unsigned int
4187     chunkSize,
4188     pixelPerWorkgroup;
4189 
4190   verticalKernel=NULL;
4191   outputReady=MagickFalse;
4192 
4193   /*
4194   Apply filter to resize vertically from image to resize image.
4195   */
4196   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4197   support=scale*GetResizeFilterSupport(resizeFilter);
4198   if (support < 0.5)
4199   {
4200     /*
4201     Support too small even for nearest neighbour: Reduce to point
4202     sampling.
4203     */
4204     support=(float) 0.5;
4205     scale=1.0;
4206   }
4207   scale=PerceptibleReciprocal(scale);
4208 
4209   if (resizedRows < workgroupSize)
4210   {
4211     chunkSize=32;
4212     pixelPerWorkgroup=32;
4213   }
4214   else
4215   {
4216     chunkSize=workgroupSize;
4217     pixelPerWorkgroup=workgroupSize;
4218   }
4219 
4220 DisableMSCWarning(4127)
4221   while(1)
4222 RestoreMSCWarning
4223   {
4224     /* calculate the local memory size needed per workgroup */
4225     cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4226     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4227       MagickEpsilon)+support+0.5);
4228     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4229     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4230       number_channels;
4231     totalLocalMemorySize=imageCacheLocalMemorySize;
4232 
4233     /* local size for the pixel accumulator */
4234     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4235     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4236 
4237     /* local memory size for the weight accumulator */
4238     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4239     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4240 
4241     /* local memory size for the gamma accumulator */
4242     if ((number_channels == 4) || (number_channels == 2))
4243       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4244     else
4245       gammaAccumulatorLocalMemorySize=sizeof(float);
4246     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4247 
4248     if (totalLocalMemorySize <= device->local_memory_size)
4249       break;
4250     else
4251     {
4252       pixelPerWorkgroup=pixelPerWorkgroup/2;
4253       chunkSize=chunkSize/2;
4254       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4255       {
4256         /* quit, fallback to CPU */
4257         goto cleanup;
4258       }
4259     }
4260   }
4261 
4262   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4263   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4264 
4265   verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4266   if (verticalKernel == (cl_kernel) NULL)
4267   {
4268     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4269       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4270     goto cleanup;
4271   }
4272 
4273   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4274   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4275   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4276   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4277 
4278   i=0;
4279   status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4280   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4281   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4282   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4283   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4284   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4285   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4286   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4287   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4288   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4289   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4290   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4291   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4292   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4293   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4294   status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4295   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4296   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4297   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4298   status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4299   status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4300   status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4301 
4302   if (status != CL_SUCCESS)
4303   {
4304     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4305       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4306     goto cleanup;
4307   }
4308 
4309   gsize[0]=resizedColumns;
4310   gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4311     workgroupSize;
4312   lsize[0]=1;
4313   lsize[1]=workgroupSize;
4314   outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4315     gsize,lsize,image,filteredImage,MagickFalse,exception);
4316 
4317 cleanup:
4318 
4319   if (verticalKernel != (cl_kernel) NULL)
4320     ReleaseOpenCLKernel(verticalKernel);
4321 
4322   return(outputReady);
4323 }
4324 
ComputeResizeImage(const Image * image,MagickCLEnv clEnv,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4325 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4326   const size_t resizedColumns,const size_t resizedRows,
4327   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4328 {
4329   cl_command_queue
4330     queue;
4331 
4332   cl_mem
4333     cubicCoefficientsBuffer,
4334     filteredImageBuffer,
4335     imageBuffer,
4336     tempImageBuffer;
4337 
4338   cl_uint
4339     number_channels;
4340 
4341   const double
4342     *resizeFilterCoefficient;
4343 
4344   float
4345     coefficientBuffer[7],
4346     xFactor,
4347     yFactor;
4348 
4349   MagickBooleanType
4350     outputReady;
4351 
4352   MagickCLDevice
4353     device;
4354 
4355   MagickSizeType
4356     length;
4357 
4358   Image
4359     *filteredImage;
4360 
4361   size_t
4362     i;
4363 
4364   filteredImage=NULL;
4365   imageBuffer=NULL;
4366   filteredImageBuffer=NULL;
4367   tempImageBuffer=NULL;
4368   cubicCoefficientsBuffer=NULL;
4369   outputReady=MagickFalse;
4370 
4371   device=RequestOpenCLDevice(clEnv);
4372   queue=AcquireOpenCLCommandQueue(device);
4373   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4374     exception);
4375   if (filteredImage == (Image *) NULL)
4376     goto cleanup;
4377   if (filteredImage->number_channels != image->number_channels)
4378     goto cleanup;
4379   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4380   if (imageBuffer == (cl_mem) NULL)
4381     goto cleanup;
4382   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4383   if (filteredImageBuffer == (cl_mem) NULL)
4384     goto cleanup;
4385 
4386   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4387   for (i = 0; i < 7; i++)
4388     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4389   cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4390     CL_MEM_READ_ONLY,sizeof(coefficientBuffer),&coefficientBuffer);
4391   if (cubicCoefficientsBuffer == (cl_mem) NULL)
4392   {
4393     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4394       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4395     goto cleanup;
4396   }
4397 
4398   number_channels=(cl_uint) image->number_channels;
4399   xFactor=(float) resizedColumns/(float) image->columns;
4400   yFactor=(float) resizedRows/(float) image->rows;
4401   if (xFactor > yFactor)
4402   {
4403     length=resizedColumns*image->rows*number_channels;
4404     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4405       sizeof(CLQuantum),(void *) NULL);
4406     if (tempImageBuffer == (cl_mem) NULL)
4407     {
4408       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4409         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4410       goto cleanup;
4411     }
4412 
4413     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4414       imageBuffer,number_channels,(cl_uint) image->columns,
4415       (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4416       (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4417       exception);
4418     if (outputReady == MagickFalse)
4419       goto cleanup;
4420 
4421     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4422       tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4423       (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4424       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4425       exception);
4426     if (outputReady == MagickFalse)
4427       goto cleanup;
4428   }
4429   else
4430   {
4431     length=image->columns*resizedRows*number_channels;
4432     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4433       sizeof(CLQuantum),(void *) NULL);
4434     if (tempImageBuffer == (cl_mem) NULL)
4435     {
4436       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4437         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4438       goto cleanup;
4439     }
4440 
4441     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4442       imageBuffer,number_channels,(cl_uint) image->columns,
4443       (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4444       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4445       exception);
4446     if (outputReady == MagickFalse)
4447       goto cleanup;
4448 
4449     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4450       tempImageBuffer,number_channels,(cl_uint) image->columns,
4451       (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4452       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4453       exception);
4454     if (outputReady == MagickFalse)
4455       goto cleanup;
4456   }
4457 
4458 cleanup:
4459 
4460   if (imageBuffer != (cl_mem) NULL)
4461     ReleaseOpenCLMemObject(imageBuffer);
4462   if (filteredImageBuffer != (cl_mem) NULL)
4463     ReleaseOpenCLMemObject(filteredImageBuffer);
4464   if (tempImageBuffer != (cl_mem) NULL)
4465     ReleaseOpenCLMemObject(tempImageBuffer);
4466   if (cubicCoefficientsBuffer != (cl_mem) NULL)
4467     ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4468   if (queue != (cl_command_queue) NULL)
4469     ReleaseOpenCLCommandQueue(device,queue);
4470   if (device != (MagickCLDevice) NULL)
4471     ReleaseOpenCLDevice(device);
4472   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4473     filteredImage=DestroyImage(filteredImage);
4474 
4475   return(filteredImage);
4476 }
4477 
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4478 static MagickBooleanType gpuSupportedResizeWeighting(
4479   ResizeWeightingFunctionType f)
4480 {
4481   unsigned int
4482     i;
4483 
4484   for (i = 0; ;i++)
4485   {
4486     if (supportedResizeWeighting[i] == LastWeightingFunction)
4487       break;
4488     if (supportedResizeWeighting[i] == f)
4489       return(MagickTrue);
4490   }
4491   return(MagickFalse);
4492 }
4493 
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4494 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4495   const size_t resizedColumns,const size_t resizedRows,
4496   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4497 {
4498   Image
4499     *filteredImage;
4500 
4501   MagickCLEnv
4502     clEnv;
4503 
4504   assert(image != NULL);
4505   assert(exception != (ExceptionInfo *) NULL);
4506 
4507   if (checkAccelerateCondition(image) == MagickFalse)
4508     return((Image *) NULL);
4509 
4510   if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4511          resizeFilter)) == MagickFalse) ||
4512       (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4513          resizeFilter)) == MagickFalse))
4514     return((Image *) NULL);
4515 
4516   clEnv=getOpenCLEnvironment(exception);
4517   if (clEnv == (MagickCLEnv) NULL)
4518     return((Image *) NULL);
4519 
4520   filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4521     resizeFilter,exception);
4522   return(filteredImage);
4523 }
4524 
4525 /*
4526 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4527 %                                                                             %
4528 %                                                                             %
4529 %                                                                             %
4530 %     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
4531 %                                                                             %
4532 %                                                                             %
4533 %                                                                             %
4534 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4535 */
4536 
ComputeRotationalBlurImage(const Image * image,MagickCLEnv clEnv,const double angle,ExceptionInfo * exception)4537 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4538   const double angle,ExceptionInfo *exception)
4539 {
4540   cl_command_queue
4541     queue;
4542 
4543   cl_float2
4544     blurCenter;
4545 
4546   cl_int
4547     status;
4548 
4549   cl_mem
4550     cosThetaBuffer,
4551     filteredImageBuffer,
4552     imageBuffer,
4553     sinThetaBuffer;
4554 
4555   cl_kernel
4556     rotationalBlurKernel;
4557 
4558   cl_uint
4559     cossin_theta_size,
4560     number_channels;
4561 
4562   float
4563     blurRadius,
4564     *cosThetaPtr,
4565     offset,
4566     *sinThetaPtr,
4567     theta;
4568 
4569   Image
4570     *filteredImage;
4571 
4572   MagickBooleanType
4573     outputReady;
4574 
4575   MagickCLDevice
4576     device;
4577 
4578   size_t
4579     gsize[2],
4580     i;
4581 
4582   assert(image != (Image *) NULL);
4583   assert(image->signature == MagickCoreSignature);
4584   if (image->debug != MagickFalse)
4585     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4586 
4587   filteredImage=NULL;
4588   imageBuffer=NULL;
4589   filteredImageBuffer=NULL;
4590   sinThetaBuffer=NULL;
4591   cosThetaBuffer=NULL;
4592   rotationalBlurKernel=NULL;
4593   outputReady=MagickFalse;
4594 
4595   device=RequestOpenCLDevice(clEnv);
4596   queue=AcquireOpenCLCommandQueue(device);
4597   filteredImage=cloneImage(image,exception);
4598   if (filteredImage == (Image *) NULL)
4599     goto cleanup;
4600   if (filteredImage->number_channels != image->number_channels)
4601     goto cleanup;
4602   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4603   if (imageBuffer == (cl_mem) NULL)
4604     goto cleanup;
4605   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4606   if (filteredImageBuffer == (cl_mem) NULL)
4607     goto cleanup;
4608 
4609   blurCenter.x=(float) (image->columns-1)/2.0;
4610   blurCenter.y=(float) (image->rows-1)/2.0;
4611   blurRadius=hypot(blurCenter.x,blurCenter.y);
4612   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4613     (double) blurRadius)+2UL);
4614 
4615   cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4616   if (cosThetaPtr == (float *) NULL)
4617     goto cleanup;
4618   sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4619   if (sinThetaPtr == (float *) NULL)
4620   {
4621     cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4622     goto cleanup;
4623   }
4624 
4625   theta=DegreesToRadians(angle)/(double) (cossin_theta_size-1);
4626   offset=theta*(float) (cossin_theta_size-1)/2.0;
4627   for (i=0; i < (ssize_t) cossin_theta_size; i++)
4628   {
4629     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4630     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4631   }
4632 
4633   sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4634     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4635   sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4636   cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4637     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4638   cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4639   if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4640   {
4641     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4642       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4643     goto cleanup;
4644   }
4645 
4646   rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4647   if (rotationalBlurKernel == (cl_kernel) NULL)
4648   {
4649     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4650       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4651     goto cleanup;
4652   }
4653 
4654   number_channels=(cl_uint) image->number_channels;
4655 
4656   i=0;
4657   status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4658   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4659   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4660   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4661   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4662   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4663   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4664   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4665   if (status != CL_SUCCESS)
4666   {
4667     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4668       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4669     goto cleanup;
4670   }
4671 
4672   gsize[0]=image->columns;
4673   gsize[1]=image->rows;
4674   outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4675     (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4676     MagickFalse,exception);
4677 
4678 cleanup:
4679 
4680   if (imageBuffer != (cl_mem) NULL)
4681     ReleaseOpenCLMemObject(imageBuffer);
4682   if (filteredImageBuffer != (cl_mem) NULL)
4683     ReleaseOpenCLMemObject(filteredImageBuffer);
4684   if (sinThetaBuffer != (cl_mem) NULL)
4685     ReleaseOpenCLMemObject(sinThetaBuffer);
4686   if (cosThetaBuffer != (cl_mem) NULL)
4687     ReleaseOpenCLMemObject(cosThetaBuffer);
4688   if (rotationalBlurKernel != (cl_kernel) NULL)
4689     ReleaseOpenCLKernel(rotationalBlurKernel);
4690   if (queue != (cl_command_queue) NULL)
4691     ReleaseOpenCLCommandQueue(device,queue);
4692   if (device != (MagickCLDevice) NULL)
4693     ReleaseOpenCLDevice(device);
4694   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4695     filteredImage=DestroyImage(filteredImage);
4696 
4697   return(filteredImage);
4698 }
4699 
AccelerateRotationalBlurImage(const Image * image,const double angle,ExceptionInfo * exception)4700 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4701   const double angle,ExceptionInfo *exception)
4702 {
4703   Image
4704     *filteredImage;
4705 
4706   MagickCLEnv
4707     clEnv;
4708 
4709   assert(image != NULL);
4710   assert(exception != (ExceptionInfo *) NULL);
4711   if (image->debug != MagickFalse)
4712     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
4713 
4714   if (checkAccelerateCondition(image) == MagickFalse)
4715     return((Image *) NULL);
4716 
4717   clEnv=getOpenCLEnvironment(exception);
4718   if (clEnv == (MagickCLEnv) NULL)
4719     return((Image *) NULL);
4720 
4721   filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4722   return filteredImage;
4723 }
4724 
4725 /*
4726 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4727 %                                                                             %
4728 %                                                                             %
4729 %                                                                             %
4730 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4731 %                                                                             %
4732 %                                                                             %
4733 %                                                                             %
4734 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4735 */
4736 
ComputeUnsharpMaskImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4737 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4738   const double radius,const double sigma,const double gain,
4739   const double threshold,ExceptionInfo *exception)
4740 {
4741   cl_command_queue
4742     queue;
4743 
4744   cl_int
4745     status;
4746 
4747   cl_kernel
4748     blurRowKernel,
4749     unsharpMaskBlurColumnKernel;
4750 
4751   cl_mem
4752     filteredImageBuffer,
4753     imageBuffer,
4754     imageKernelBuffer,
4755     tempImageBuffer;
4756 
4757   cl_uint
4758     imageColumns,
4759     imageRows,
4760     kernelWidth,
4761     number_channels;
4762 
4763   float
4764     fGain,
4765     fThreshold;
4766 
4767   Image
4768     *filteredImage;
4769 
4770   int
4771     chunkSize;
4772 
4773   MagickBooleanType
4774     outputReady;
4775 
4776   MagickCLDevice
4777     device;
4778 
4779   MagickSizeType
4780     length;
4781 
4782   size_t
4783     gsize[2],
4784     i,
4785     lsize[2];
4786 
4787   filteredImage=NULL;
4788   imageBuffer=NULL;
4789   filteredImageBuffer=NULL;
4790   tempImageBuffer=NULL;
4791   imageKernelBuffer=NULL;
4792   blurRowKernel=NULL;
4793   unsharpMaskBlurColumnKernel=NULL;
4794   outputReady=MagickFalse;
4795 
4796   device=RequestOpenCLDevice(clEnv);
4797   queue=AcquireOpenCLCommandQueue(device);
4798   filteredImage=cloneImage(image,exception);
4799   if (filteredImage == (Image *) NULL)
4800     goto cleanup;
4801   if (filteredImage->number_channels != image->number_channels)
4802     goto cleanup;
4803   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4804   if (imageBuffer == (cl_mem) NULL)
4805     goto cleanup;
4806   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4807   if (filteredImageBuffer == (cl_mem) NULL)
4808     goto cleanup;
4809 
4810   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4811     exception);
4812 
4813   length=image->columns*image->rows;
4814   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4815     sizeof(cl_float4),NULL);
4816   if (tempImageBuffer == (cl_mem) NULL)
4817   {
4818     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4819       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4820     goto cleanup;
4821   }
4822 
4823   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4824   if (blurRowKernel == (cl_kernel) NULL)
4825   {
4826     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4827       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4828     goto cleanup;
4829   }
4830 
4831   unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4832     "UnsharpMaskBlurColumn");
4833   if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4834   {
4835     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4836       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4837     goto cleanup;
4838   }
4839 
4840   number_channels=(cl_uint) image->number_channels;
4841   imageColumns=(cl_uint) image->columns;
4842   imageRows=(cl_uint) image->rows;
4843 
4844   chunkSize = 256;
4845 
4846   i=0;
4847   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4848   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4849   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4850   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4851   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4852   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4853   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4854   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4855   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4856   if (status != CL_SUCCESS)
4857   {
4858     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4859       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4860     goto cleanup;
4861   }
4862 
4863   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4864   gsize[1]=image->rows;
4865   lsize[0]=chunkSize;
4866   lsize[1]=1;
4867   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4868     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4869     exception);
4870 
4871   chunkSize=256;
4872   fGain=(float) gain;
4873   fThreshold=(float) threshold;
4874 
4875   i=0;
4876   status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4877   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4878   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4879   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4880   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4881   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4882   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4883   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4884   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4885   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4886   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4887   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4888   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4889   if (status != CL_SUCCESS)
4890   {
4891     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4892       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4893     goto cleanup;
4894   }
4895 
4896   gsize[0]=image->columns;
4897   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4898   lsize[0]=1;
4899   lsize[1]=chunkSize;
4900   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4901     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4902     exception);
4903 
4904 cleanup:
4905 
4906   if (imageBuffer != (cl_mem) NULL)
4907     ReleaseOpenCLMemObject(imageBuffer);
4908   if (filteredImageBuffer != (cl_mem) NULL)
4909     ReleaseOpenCLMemObject(filteredImageBuffer);
4910   if (tempImageBuffer != (cl_mem) NULL)
4911     ReleaseOpenCLMemObject(tempImageBuffer);
4912   if (imageKernelBuffer != (cl_mem) NULL)
4913     ReleaseOpenCLMemObject(imageKernelBuffer);
4914   if (blurRowKernel != (cl_kernel) NULL)
4915     ReleaseOpenCLKernel(blurRowKernel);
4916   if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4917     ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4918   if (queue != (cl_command_queue) NULL)
4919     ReleaseOpenCLCommandQueue(device,queue);
4920   if (device != (MagickCLDevice) NULL)
4921     ReleaseOpenCLDevice(device);
4922   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4923     filteredImage=DestroyImage(filteredImage);
4924 
4925   return(filteredImage);
4926 }
4927 
ComputeUnsharpMaskImageSingle(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4928 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4929   MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4930   const double threshold,ExceptionInfo *exception)
4931 {
4932   cl_command_queue
4933     queue;
4934 
4935   cl_int
4936     status;
4937 
4938   cl_kernel
4939     unsharpMaskKernel;
4940 
4941   cl_mem
4942     filteredImageBuffer,
4943     imageBuffer,
4944     imageKernelBuffer;
4945 
4946   cl_uint
4947     imageColumns,
4948     imageRows,
4949     kernelWidth,
4950     number_channels;
4951 
4952   float
4953     fGain,
4954     fThreshold;
4955 
4956   Image
4957     *filteredImage;
4958 
4959   MagickBooleanType
4960     outputReady;
4961 
4962   MagickCLDevice
4963     device;
4964 
4965   size_t
4966     gsize[2],
4967     i,
4968     lsize[2];
4969 
4970   filteredImage=NULL;
4971   imageBuffer=NULL;
4972   filteredImageBuffer=NULL;
4973   imageKernelBuffer=NULL;
4974   unsharpMaskKernel=NULL;
4975   outputReady=MagickFalse;
4976 
4977   device=RequestOpenCLDevice(clEnv);
4978   queue=AcquireOpenCLCommandQueue(device);
4979   filteredImage=cloneImage(image,exception);
4980   if (filteredImage == (Image *) NULL)
4981     goto cleanup;
4982   if (filteredImage->number_channels != image->number_channels)
4983     goto cleanup;
4984   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4985   if (imageBuffer == (cl_mem) NULL)
4986     goto cleanup;
4987   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4988   if (filteredImageBuffer == (cl_mem) NULL)
4989     goto cleanup;
4990 
4991   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4992     exception);
4993 
4994   unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4995   if (unsharpMaskKernel == NULL)
4996   {
4997     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4998       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4999     goto cleanup;
5000   }
5001 
5002   imageColumns=(cl_uint) image->columns;
5003   imageRows=(cl_uint) image->rows;
5004   number_channels=(cl_uint) image->number_channels;
5005   fGain=(float) gain;
5006   fThreshold=(float) threshold;
5007 
5008   i=0;
5009   status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5010   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5011   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
5012   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
5013   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
5014   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
5015   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
5016   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
5017   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5018   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5019   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5020   if (status != CL_SUCCESS)
5021   {
5022     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5023       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5024     goto cleanup;
5025   }
5026 
5027   gsize[0]=((image->columns + 7) / 8)*8;
5028   gsize[1]=((image->rows + 31) / 32)*32;
5029   lsize[0]=8;
5030   lsize[1]=32;
5031   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
5032     gsize,lsize,image,filteredImage,MagickFalse,exception);
5033 
5034 cleanup:
5035 
5036   if (imageBuffer != (cl_mem) NULL)
5037     ReleaseOpenCLMemObject(imageBuffer);
5038   if (filteredImageBuffer != (cl_mem) NULL)
5039     ReleaseOpenCLMemObject(filteredImageBuffer);
5040   if (imageKernelBuffer != (cl_mem) NULL)
5041     ReleaseOpenCLMemObject(imageKernelBuffer);
5042   if (unsharpMaskKernel != (cl_kernel) NULL)
5043     ReleaseOpenCLKernel(unsharpMaskKernel);
5044   if (queue != (cl_command_queue) NULL)
5045     ReleaseOpenCLCommandQueue(device,queue);
5046   if (device != (MagickCLDevice) NULL)
5047     ReleaseOpenCLDevice(device);
5048   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5049     filteredImage=DestroyImage(filteredImage);
5050 
5051   return(filteredImage);
5052 }
5053 
AccelerateUnsharpMaskImage(const Image * image,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5054 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5055   const double radius,const double sigma,const double gain,
5056   const double threshold,ExceptionInfo *exception)
5057 {
5058   Image
5059     *filteredImage;
5060 
5061   MagickCLEnv
5062     clEnv;
5063 
5064   assert(image != NULL);
5065   assert(exception != (ExceptionInfo *) NULL);
5066 
5067   if (checkAccelerateCondition(image) == MagickFalse)
5068     return((Image *) NULL);
5069 
5070   clEnv=getOpenCLEnvironment(exception);
5071   if (clEnv == (MagickCLEnv) NULL)
5072     return((Image *) NULL);
5073 
5074   if (radius < 12.1)
5075     filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5076       threshold,exception);
5077   else
5078     filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5079       threshold,exception);
5080   return(filteredImage);
5081 }
5082 
ComputeWaveletDenoiseImage(const Image * image,MagickCLEnv clEnv,const double threshold,ExceptionInfo * exception)5083 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5084   const double threshold,ExceptionInfo *exception)
5085 {
5086   cl_command_queue
5087     queue;
5088 
5089   const cl_int
5090     PASSES=5;
5091 
5092   const int
5093     TILESIZE=64,
5094     PAD=1<<(PASSES-1),
5095     SIZE=TILESIZE-2*PAD;
5096 
5097   cl_float
5098     thresh;
5099 
5100   cl_int
5101     status;
5102 
5103   cl_kernel
5104     denoiseKernel;
5105 
5106   cl_mem
5107     filteredImageBuffer,
5108     imageBuffer;
5109 
5110   cl_uint
5111     number_channels,
5112     width,
5113     height,
5114     max_channels;
5115 
5116   Image
5117     *filteredImage;
5118 
5119   MagickBooleanType
5120     outputReady;
5121 
5122   MagickCLDevice
5123     device;
5124 
5125   size_t
5126     goffset[2],
5127     gsize[2],
5128     i,
5129     lsize[2],
5130     passes,
5131     x;
5132 
5133   filteredImage=NULL;
5134   imageBuffer=NULL;
5135   filteredImageBuffer=NULL;
5136   denoiseKernel=NULL;
5137   queue=NULL;
5138   outputReady=MagickFalse;
5139 
5140   device=RequestOpenCLDevice(clEnv);
5141   /* Work around an issue on low end Intel devices */
5142   if (strcmp("Intel(R) HD Graphics",device->name) == 0)
5143     goto cleanup;
5144   queue=AcquireOpenCLCommandQueue(device);
5145   filteredImage=CloneImage(image,0,0,MagickTrue,
5146     exception);
5147   if (filteredImage == (Image *) NULL)
5148     goto cleanup;
5149   if (filteredImage->number_channels != image->number_channels)
5150     goto cleanup;
5151   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5152   if (imageBuffer == (cl_mem) NULL)
5153     goto cleanup;
5154   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5155   if (filteredImageBuffer == (cl_mem) NULL)
5156     goto cleanup;
5157 
5158   denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5159   if (denoiseKernel == (cl_kernel) NULL)
5160   {
5161     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5162       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5163     goto cleanup;
5164   }
5165 
5166   number_channels=(cl_uint)image->number_channels;
5167   width=(cl_uint)image->columns;
5168   height=(cl_uint)image->rows;
5169   max_channels=number_channels;
5170   if ((max_channels == 4) || (max_channels == 2))
5171     max_channels=max_channels-1;
5172   thresh=threshold;
5173   passes=(((1.0f*image->columns)*image->rows)+1999999.0f)/2000000.0f;
5174   passes=(passes < 1) ? 1 : passes;
5175 
5176   i=0;
5177   status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5178   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5179   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5180   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5181   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5182   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5183   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5184   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5185   if (status != CL_SUCCESS)
5186     {
5187       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5188         ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5189       goto cleanup;
5190     }
5191 
5192   for (x = 0; x < passes; ++x)
5193   {
5194     gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5195     gsize[1]=((((height+(SIZE-1))/SIZE)+passes-1)/passes)*4;
5196     lsize[0]=TILESIZE;
5197     lsize[1]=4;
5198     goffset[0]=0;
5199     goffset[1]=x*gsize[1];
5200 
5201     outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,goffset,gsize,lsize,
5202       image,filteredImage,MagickTrue,exception);
5203     if (outputReady == MagickFalse)
5204       break;
5205   }
5206 
5207 cleanup:
5208 
5209   if (imageBuffer != (cl_mem) NULL)
5210     ReleaseOpenCLMemObject(imageBuffer);
5211   if (filteredImageBuffer != (cl_mem) NULL)
5212     ReleaseOpenCLMemObject(filteredImageBuffer);
5213   if (denoiseKernel != (cl_kernel) NULL)
5214     ReleaseOpenCLKernel(denoiseKernel);
5215   if (queue != (cl_command_queue) NULL)
5216     ReleaseOpenCLCommandQueue(device,queue);
5217   if (device != (MagickCLDevice) NULL)
5218     ReleaseOpenCLDevice(device);
5219   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5220     filteredImage=DestroyImage(filteredImage);
5221 
5222   return(filteredImage);
5223 }
5224 
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5225 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5226   const double threshold,ExceptionInfo *exception)
5227 {
5228   Image
5229     *filteredImage;
5230 
5231   MagickCLEnv
5232     clEnv;
5233 
5234   assert(image != NULL);
5235   assert(exception != (ExceptionInfo *)NULL);
5236 
5237   if (checkAccelerateCondition(image) == MagickFalse)
5238     return((Image *) NULL);
5239 
5240   clEnv=getOpenCLEnvironment(exception);
5241   if (clEnv == (MagickCLEnv) NULL)
5242     return((Image *) NULL);
5243 
5244   filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5245 
5246   return(filteredImage);
5247 }
5248 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5249