1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11 %                                                                             %
12 %                                                                             %
13 %                       MagickCore Acceleration Methods                       %
14 %                                                                             %
15 %                              Software Design                                %
16 %                                  Cristy                                     %
17 %                               SiuChi Chan                                   %
18 %                              Guansong Zhang                                 %
19 %                               January 2010                                  %
20 %                               Dirk Lemstra                                  %
21 %                                April 2016                                   %
22 %                                                                             %
23 %                                                                             %
24 %  Copyright 1999-2016 ImageMagick Studio LLC, a non-profit organization      %
25 %  dedicated to making software imaging solutions freely available.           %
26 %                                                                             %
27 %  You may not use this file except in compliance with the License.  You may  %
28 %  obtain a copy of the License at                                            %
29 %                                                                             %
30 %    http://www.imagemagick.org/script/license.php                            %
31 %                                                                             %
32 %  Unless required by applicable law or agreed to in writing, software        %
33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35 %  See the License for the specific language governing permissions and        %
36 %  limitations under the License.                                             %
37 %                                                                             %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40 
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82 
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85 
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87 
88 /*
89   Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92 
93 /*
94   Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98   BoxWeightingFunction,
99   TriangleWeightingFunction,
100   HannWeightingFunction,
101   HammingWeightingFunction,
102   BlackmanWeightingFunction,
103   CubicBCWeightingFunction,
104   SincWeightingFunction,
105   SincFastWeightingFunction,
106   LastWeightingFunction
107 };
108 
109 /*
110   Helper functions.
111 */
checkAccelerateCondition(const Image * image)112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114   /* check if the image's colorspace is supported */
115   if (image->colorspace != RGBColorspace &&
116       image->colorspace != sRGBColorspace &&
117       image->colorspace != GRAYColorspace)
118     return(MagickFalse);
119 
120   /* check if the virtual pixel method is compatible with the OpenCL implementation */
121   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
122       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
123     return(MagickFalse);
124 
125   /* check if the image has read / write mask */
126   if (image->read_mask != MagickFalse || image->write_mask != MagickFalse)
127     return(MagickFalse);
128 
129   if (image->number_channels > 4)
130     return(MagickFalse);
131 
132   /* check if pixel order is R */
133   if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
134     return(MagickFalse);
135 
136   if (image->number_channels == 1)
137     return(MagickTrue);
138 
139   /* check if pixel order is RA */
140   if ((image->number_channels == 2) &&
141       (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
142     return(MagickTrue);
143 
144   if (image->number_channels == 2)
145     return(MagickFalse);
146 
147   /* check if pixel order is RGB */
148   if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
149       (GetPixelChannelOffset(image,BluePixelChannel) != 2))
150     return(MagickFalse);
151 
152   if (image->number_channels == 3)
153     return(MagickTrue);
154 
155   /* check if pixel order is RGBA */
156   if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
157     return(MagickFalse);
158 
159   return(MagickTrue);
160 }
161 
checkAccelerateConditionRGBA(const Image * image)162 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
163 {
164   if (checkAccelerateCondition(image) == MagickFalse)
165     return(MagickFalse);
166 
167   /* the order will be RGBA if the image has 4 channels */
168   if (image->number_channels != 4)
169     return(MagickFalse);
170 
171   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
172       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
173       (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
174       (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
175     return(MagickFalse);
176 
177   return(MagickTrue);
178 }
179 
checkPixelIntensity(const Image * image,const PixelIntensityMethod method)180 static MagickBooleanType checkPixelIntensity(const Image *image,
181   const PixelIntensityMethod method)
182 {
183   /* EncodePixelGamma and DecodePixelGamma are not supported */
184   if ((method == Rec601LumaPixelIntensityMethod) ||
185       (method == Rec709LumaPixelIntensityMethod))
186     {
187       if (image->colorspace == RGBColorspace)
188         return(MagickFalse);
189     }
190 
191   if ((method == Rec601LuminancePixelIntensityMethod) ||
192       (method == Rec709LuminancePixelIntensityMethod))
193     {
194       if (image->colorspace == sRGBColorspace)
195         return(MagickFalse);
196     }
197 
198   return(MagickTrue);
199 }
200 
checkHistogramCondition(const Image * image,const PixelIntensityMethod method)201 static MagickBooleanType checkHistogramCondition(const Image *image,
202   const PixelIntensityMethod method)
203 {
204   /* ensure this is the only pass get in for now. */
205   if ((image->channel_mask & SyncChannels) == 0)
206     return MagickFalse;
207 
208   return(checkPixelIntensity(image,method));
209 }
210 
getOpenCLEnvironment(ExceptionInfo * exception)211 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
212 {
213   MagickCLEnv
214     clEnv;
215 
216   clEnv=GetCurrentOpenCLEnv();
217   if (clEnv == (MagickCLEnv) NULL)
218     return((MagickCLEnv) NULL);
219 
220   if (clEnv->enabled == MagickFalse)
221     return((MagickCLEnv) NULL);
222 
223   if (InitializeOpenCL(clEnv,exception) == MagickFalse)
224     return((MagickCLEnv) NULL);
225 
226   return(clEnv);
227 }
228 
cloneImage(const Image * image,ExceptionInfo * exception)229 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
230 {
231   Image
232     *clone;
233 
234   if (((image->channel_mask & RedChannel) != 0) &&
235       ((image->channel_mask & GreenChannel) != 0) &&
236       ((image->channel_mask & BlueChannel) != 0) &&
237       ((image->channel_mask & AlphaChannel) != 0))
238     clone=CloneImage(image,image->columns,image->rows,MagickTrue,exception);
239   else
240     {
241       clone=CloneImage(image,0,0,MagickTrue,exception);
242       if (clone != (Image *) NULL)
243         SyncImagePixelCache(clone,exception);
244     }
245   return(clone);
246 }
247 
248 /* pad the global workgroup size to the next multiple of
249    the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)250 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
251   const unsigned int orgGlobalSize,const unsigned int localGroupSize)
252 {
253   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
254 }
255 
createKernelInfo(MagickCLDevice device,const double radius,const double sigma,cl_uint * width,ExceptionInfo * exception)256 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
257   const double sigma,cl_uint *width,ExceptionInfo *exception)
258 {
259   char
260     geometry[MagickPathExtent];
261 
262   cl_int
263     status;
264 
265   cl_mem
266     imageKernelBuffer;
267 
268   float
269     *kernelBufferPtr;
270 
271   KernelInfo
272     *kernel;
273 
274   ssize_t
275     i;
276 
277   (void) FormatLocaleString(geometry,MagickPathExtent,
278     "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
279   kernel=AcquireKernelInfo(geometry,exception);
280   if (kernel == (KernelInfo *) NULL)
281   {
282     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
283       ResourceLimitWarning,"AcquireKernelInfo failed.",".");
284     return((cl_mem) NULL);
285   }
286   kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
287     sizeof(*kernelBufferPtr));
288   for (i = 0; i < (ssize_t) kernel->width; i++)
289     kernelBufferPtr[i] = (float)kernel->values[i];
290   imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
291     CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
292   *width=kernel->width;
293   kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
294   kernel=DestroyKernelInfo(kernel);
295   if (imageKernelBuffer == (cl_mem) NULL)
296     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
297       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
298   return(imageKernelBuffer);
299 }
300 
LaunchHistogramKernel(MagickCLEnv clEnv,MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)301 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
302   MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
303   cl_mem histogramBuffer,Image *image,const ChannelType channel,
304   ExceptionInfo *exception)
305 {
306   MagickBooleanType
307     outputReady;
308 
309   cl_int
310     clStatus;
311 
312   cl_kernel
313     histogramKernel;
314 
315   cl_event
316     event;
317 
318   cl_uint
319     colorspace,
320     method;
321 
322   register ssize_t
323     i;
324 
325   size_t
326     global_work_size[2];
327 
328   histogramKernel = NULL;
329 
330   outputReady = MagickFalse;
331   colorspace = image->colorspace;
332   method = image->intensity;
333 
334   /* get the OpenCL kernel */
335   histogramKernel = AcquireOpenCLKernel(device,"Histogram");
336   if (histogramKernel == NULL)
337   {
338     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
339     goto cleanup;
340   }
341 
342   /* set the kernel arguments */
343   i = 0;
344   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
345   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
346   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
347   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
348   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
349   if (clStatus != CL_SUCCESS)
350   {
351     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
352     goto cleanup;
353   }
354 
355   /* launch the kernel */
356   global_work_size[0] = image->columns;
357   global_work_size[1] = image->rows;
358 
359   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
360 
361   if (clStatus != CL_SUCCESS)
362   {
363     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
364     goto cleanup;
365   }
366   RecordProfileData(device,histogramKernel,event);
367 
368   outputReady = MagickTrue;
369 
370 cleanup:
371 
372   if (histogramKernel!=NULL)
373     ReleaseOpenCLKernel(histogramKernel);
374 
375   return(outputReady);
376 }
377 
378 /*
379 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
380 %                                                                             %
381 %                                                                             %
382 %                                                                             %
383 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
384 %                                                                             %
385 %                                                                             %
386 %                                                                             %
387 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
388 */
389 
ComputeAddNoiseImage(const Image * image,MagickCLEnv clEnv,const NoiseType noise_type,ExceptionInfo * exception)390 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
391   const NoiseType noise_type,ExceptionInfo *exception)
392 {
393   cl_command_queue
394     queue;
395 
396   cl_float
397     attenuate;
398 
399   cl_int
400     status;
401 
402   cl_kernel
403     addNoiseKernel;
404 
405   cl_mem
406     filteredImageBuffer,
407     imageBuffer;
408 
409   cl_uint
410     bufferLength,
411     inputPixelCount,
412     number_channels,
413     numRandomNumberPerPixel,
414     pixelsPerWorkitem,
415     seed0,
416     seed1,
417     workItemCount;
418 
419   const char
420     *option;
421 
422   const unsigned long
423     *s;
424 
425   MagickBooleanType
426     outputReady;
427 
428   MagickCLDevice
429     device;
430 
431   Image
432     *filteredImage;
433 
434   RandomInfo
435     *randomInfo;
436 
437   size_t
438     gsize[1],
439     i,
440     lsize[1],
441     numRandPerChannel;
442 
443   filteredImage=NULL;
444   addNoiseKernel=NULL;
445   outputReady=MagickFalse;
446 
447   device=RequestOpenCLDevice(clEnv);
448   queue=AcquireOpenCLCommandQueue(device);
449   if (queue == (cl_command_queue) NULL)
450     goto cleanup;
451   filteredImage=cloneImage(image,exception);
452   if (filteredImage == (Image *) NULL)
453     goto cleanup;
454   if (filteredImage->number_channels != image->number_channels)
455     goto cleanup;
456   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
457   if (imageBuffer == (cl_mem) NULL)
458     goto cleanup;
459   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
460   if (filteredImageBuffer == (cl_mem) NULL)
461     goto cleanup;
462 
463   /* find out how many random numbers needed by pixel */
464   numRandPerChannel=0;
465   numRandomNumberPerPixel=0;
466   switch (noise_type)
467   {
468     case UniformNoise:
469     case ImpulseNoise:
470     case LaplacianNoise:
471     case RandomNoise:
472     default:
473       numRandPerChannel=1;
474       break;
475     case GaussianNoise:
476     case MultiplicativeGaussianNoise:
477     case PoissonNoise:
478       numRandPerChannel=2;
479       break;
480   };
481   if (GetPixelRedTraits(image) != UndefinedPixelTrait)
482     numRandomNumberPerPixel+=numRandPerChannel;
483   if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
484     numRandomNumberPerPixel+=numRandPerChannel;
485   if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
486     numRandomNumberPerPixel+=numRandPerChannel;
487   if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
488     numRandomNumberPerPixel+=numRandPerChannel;
489 
490   addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
491   if (addNoiseKernel == (cl_kernel) NULL)
492   {
493     (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
494       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
495     goto cleanup;
496   }
497 
498   /* 256 work items per group, 2 groups per CU */
499   workItemCount=device->max_compute_units*2*256;
500   inputPixelCount=(cl_int) (image->columns*image->rows);
501   pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
502   pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
503   lsize[0]=256;
504   gsize[0]=workItemCount;
505 
506   randomInfo=AcquireRandomInfo();
507   s=GetRandomInfoSeed(randomInfo);
508   seed0=s[0];
509   (void) GetPseudoRandomValue(randomInfo);
510   seed1=s[0];
511   randomInfo=DestroyRandomInfo(randomInfo);
512 
513   number_channels=(cl_uint) image->number_channels;
514   bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
515   attenuate=1.0f;
516   option=GetImageArtifact(image,"attenuate");
517   if (option != (char *) NULL)
518     attenuate=(float)StringToDouble(option,(char **) NULL);
519 
520   i=0;
521   status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
522   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
523   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
524   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
525   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
526   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
527   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&attenuate);
528   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
529   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
530   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
531   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
532   if (status != CL_SUCCESS)
533   {
534     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
535       ResourceLimitWarning,"clSetKernelArg failed.",".");
536     goto cleanup;
537   }
538 
539   outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
540     lsize,image,filteredImage,exception);
541 
542 cleanup:
543 
544   if (addNoiseKernel != (cl_kernel) NULL)
545     ReleaseOpenCLKernel(addNoiseKernel);
546   if (queue != (cl_command_queue) NULL)
547     ReleaseOpenCLCommandQueue(device,queue);
548   if (device != (MagickCLDevice) NULL)
549     ReleaseOpenCLDevice(device);
550   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
551     filteredImage=DestroyImage(filteredImage);
552 
553   return(filteredImage);
554 }
555 
AccelerateAddNoiseImage(const Image * image,const NoiseType noise_type,ExceptionInfo * exception)556 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
557   const NoiseType noise_type,ExceptionInfo *exception)
558 {
559   Image
560     *filteredImage;
561 
562   MagickCLEnv
563     clEnv;
564 
565   assert(image != NULL);
566   assert(exception != (ExceptionInfo *) NULL);
567 
568   if (checkAccelerateCondition(image) == MagickFalse)
569     return((Image *) NULL);
570 
571   clEnv=getOpenCLEnvironment(exception);
572   if (clEnv == (MagickCLEnv) NULL)
573     return((Image *) NULL);
574 
575   filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,exception);
576   return(filteredImage);
577 }
578 
579 /*
580 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
581 %                                                                             %
582 %                                                                             %
583 %                                                                             %
584 %     A c c e l e r a t e B l u r I m a g e                                   %
585 %                                                                             %
586 %                                                                             %
587 %                                                                             %
588 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
589 */
590 
ComputeBlurImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,ExceptionInfo * exception)591 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
592   const double radius,const double sigma,ExceptionInfo *exception)
593 {
594   cl_command_queue
595     queue;
596 
597   cl_int
598     status;
599 
600   cl_kernel
601     blurColumnKernel,
602     blurRowKernel;
603 
604   cl_mem
605     filteredImageBuffer,
606     imageBuffer,
607     imageKernelBuffer,
608     tempImageBuffer;
609 
610   cl_uint
611     imageColumns,
612     imageRows,
613     kernelWidth,
614     number_channels;
615 
616   Image
617     *filteredImage;
618 
619   MagickBooleanType
620     outputReady;
621 
622   MagickCLDevice
623     device;
624 
625   MagickSizeType
626     length;
627 
628   size_t
629     chunkSize=256,
630     gsize[2],
631     i,
632     lsize[2];
633 
634   filteredImage=NULL;
635   tempImageBuffer=NULL;
636   imageKernelBuffer=NULL;
637   blurRowKernel=NULL;
638   blurColumnKernel=NULL;
639   outputReady=MagickFalse;
640 
641   device=RequestOpenCLDevice(clEnv);
642   queue=AcquireOpenCLCommandQueue(device);
643   filteredImage=cloneImage(image,exception);
644   if (filteredImage == (Image *) NULL)
645     goto cleanup;
646   if (filteredImage->number_channels != image->number_channels)
647     goto cleanup;
648   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
649   if (imageBuffer == (cl_mem) NULL)
650     goto cleanup;
651   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
652   if (filteredImageBuffer == (cl_mem) NULL)
653     goto cleanup;
654 
655   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
656     exception);
657   if (imageKernelBuffer == (cl_mem) NULL)
658     goto cleanup;
659 
660   length=image->columns*image->rows;
661   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
662     sizeof(cl_float4),(void *) NULL);
663   if (tempImageBuffer == (cl_mem) NULL)
664     goto cleanup;
665 
666   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
667   if (blurRowKernel == (cl_kernel) NULL)
668   {
669     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
670       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
671     goto cleanup;
672   }
673 
674   number_channels=(cl_uint) image->number_channels;
675   imageColumns=(cl_uint) image->columns;
676   imageRows=(cl_uint) image->rows;
677 
678   i=0;
679   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
680   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
681   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
682   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
683   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
684   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
685   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
686   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
687   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
688   if (status != CL_SUCCESS)
689   {
690     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
691       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
692     goto cleanup;
693   }
694 
695   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
696   gsize[1]=image->rows;
697   lsize[0]=chunkSize;
698   lsize[1]=1;
699 
700   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
701     lsize,image,filteredImage,exception);
702   if (outputReady == MagickFalse)
703     goto cleanup;
704 
705   blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
706   if (blurColumnKernel == (cl_kernel) NULL)
707   {
708     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
709       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
710     goto cleanup;
711   }
712 
713   i=0;
714   status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
715   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
716   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
717   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
718   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
719   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
720   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
721   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
722   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
723   if (status != CL_SUCCESS)
724   {
725     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
726       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
727     goto cleanup;
728   }
729 
730   gsize[0]=image->columns;
731   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
732   lsize[0]=1;
733   lsize[1]=chunkSize;
734 
735   outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
736     lsize,image,filteredImage,exception);
737 
738 cleanup:
739 
740   if (tempImageBuffer != (cl_mem) NULL)
741     ReleaseOpenCLMemObject(tempImageBuffer);
742   if (imageKernelBuffer != (cl_mem) NULL)
743     ReleaseOpenCLMemObject(imageKernelBuffer);
744   if (blurRowKernel != (cl_kernel) NULL)
745     ReleaseOpenCLKernel(blurRowKernel);
746   if (blurColumnKernel != (cl_kernel) NULL)
747     ReleaseOpenCLKernel(blurColumnKernel);
748   if (queue != (cl_command_queue) NULL)
749     ReleaseOpenCLCommandQueue(device,queue);
750   if (device != (MagickCLDevice) NULL)
751     ReleaseOpenCLDevice(device);
752   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
753     filteredImage=DestroyImage(filteredImage);
754 
755   return(filteredImage);
756 }
757 
AccelerateBlurImage(const Image * image,const double radius,const double sigma,ExceptionInfo * exception)758 MagickPrivate Image* AccelerateBlurImage(const Image *image,
759   const double radius,const double sigma,ExceptionInfo *exception)
760 {
761   Image
762     *filteredImage;
763 
764   MagickCLEnv
765     clEnv;
766 
767   assert(image != NULL);
768   assert(exception != (ExceptionInfo *) NULL);
769 
770   if (checkAccelerateCondition(image) == MagickFalse)
771     return((Image *) NULL);
772 
773   clEnv=getOpenCLEnvironment(exception);
774   if (clEnv == (MagickCLEnv) NULL)
775     return((Image *) NULL);
776 
777   filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
778   return(filteredImage);
779 }
780 
781 /*
782 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
783 %                                                                             %
784 %                                                                             %
785 %                                                                             %
786 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
787 %                                                                             %
788 %                                                                             %
789 %                                                                             %
790 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
791 */
792 
ComputeContrastImage(Image * image,MagickCLEnv clEnv,const MagickBooleanType sharpen,ExceptionInfo * exception)793 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
794   const MagickBooleanType sharpen,ExceptionInfo *exception)
795 {
796   CacheView
797     *image_view;
798 
799   cl_command_queue
800     queue;
801 
802   cl_int
803     clStatus;
804 
805   cl_kernel
806     filterKernel;
807 
808   cl_event
809     event;
810 
811   cl_mem
812     imageBuffer;
813 
814   cl_mem_flags
815     mem_flags;
816 
817   MagickBooleanType
818     outputReady;
819 
820   MagickCLDevice
821     device;
822 
823   MagickSizeType
824     length;
825 
826   size_t
827     global_work_size[2];
828 
829   unsigned int
830     i,
831     uSharpen;
832 
833   void
834     *inputPixels;
835 
836   outputReady = MagickFalse;
837   inputPixels = NULL;
838   imageBuffer = NULL;
839   filterKernel = NULL;
840   queue = NULL;
841 
842   device = RequestOpenCLDevice(clEnv);
843 
844   /* Create and initialize OpenCL buffers. */
845   image_view=AcquireAuthenticCacheView(image,exception);
846   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
847   if (inputPixels == (void *) NULL)
848   {
849     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
850     goto cleanup;
851   }
852 
853   /* If the host pointer is aligned to the size of CLPixelPacket,
854      then use the host buffer directly from the GPU; otherwise,
855      create a buffer on the GPU and copy the data over */
856   if (ALIGNED(inputPixels,CLPixelPacket))
857   {
858     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
859   }
860   else
861   {
862     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
863   }
864   /* create a CL buffer from image pixel buffer */
865   length = image->columns * image->rows;
866   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
867   if (clStatus != CL_SUCCESS)
868   {
869     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
870     goto cleanup;
871   }
872 
873   filterKernel = AcquireOpenCLKernel(device,"Contrast");
874   if (filterKernel == NULL)
875   {
876     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
877     goto cleanup;
878   }
879 
880   i = 0;
881   clStatus=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
882 
883   uSharpen = (sharpen == MagickFalse)?0:1;
884   clStatus|=clEnv->library->clSetKernelArg(filterKernel,i++,sizeof(cl_uint),&uSharpen);
885   if (clStatus != CL_SUCCESS)
886   {
887     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
888     goto cleanup;
889   }
890 
891   global_work_size[0] = image->columns;
892   global_work_size[1] = image->rows;
893   /* launch the kernel */
894   queue = AcquireOpenCLCommandQueue(device);
895   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, filterKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
896   if (clStatus != CL_SUCCESS)
897   {
898     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
899     goto cleanup;
900   }
901   RecordProfileData(device,filterKernel,event);
902 
903   if (ALIGNED(inputPixels,CLPixelPacket))
904   {
905     length = image->columns * image->rows;
906     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
907   }
908   else
909   {
910     length = image->columns * image->rows;
911     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
912   }
913   if (clStatus != CL_SUCCESS)
914   {
915     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
916     goto cleanup;
917   }
918   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
919 
920 cleanup:
921 
922   image_view=DestroyCacheView(image_view);
923 
924   if (imageBuffer!=NULL)
925     clEnv->library->clReleaseMemObject(imageBuffer);
926   if (filterKernel!=NULL)
927     ReleaseOpenCLKernel(filterKernel);
928   if (queue != NULL)
929     ReleaseOpenCLCommandQueue(device,queue);
930   if (device != NULL)
931     ReleaseOpenCLDevice(device);
932 
933   return(outputReady);
934 }
935 
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)936 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
937   const MagickBooleanType sharpen,ExceptionInfo *exception)
938 {
939   MagickBooleanType
940     status;
941 
942   MagickCLEnv
943     clEnv;
944 
945   assert(image != NULL);
946   assert(exception != (ExceptionInfo *) NULL);
947 
948   if (checkAccelerateConditionRGBA(image) == MagickFalse)
949     return(MagickFalse);
950 
951   clEnv=getOpenCLEnvironment(exception);
952   if (clEnv == (MagickCLEnv) NULL)
953     return(MagickFalse);
954 
955   status=ComputeContrastImage(image,clEnv,sharpen,exception);
956   return(status);
957 }
958 
959 /*
960 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
961 %                                                                             %
962 %                                                                             %
963 %                                                                             %
964 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
965 %                                                                             %
966 %                                                                             %
967 %                                                                             %
968 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
969 */
970 
ComputeContrastStretchImage(Image * image,MagickCLEnv clEnv,const double black_point,const double white_point,ExceptionInfo * exception)971 static MagickBooleanType ComputeContrastStretchImage(Image *image,
972   MagickCLEnv clEnv,const double black_point,const double white_point,
973   ExceptionInfo *exception)
974 {
975 #define ContrastStretchImageTag  "ContrastStretch/Image"
976 #define MaxRange(color)  ((MagickRealType) ScaleQuantumToMap((Quantum) (color)))
977 
978   CacheView
979     *image_view;
980 
981   cl_command_queue
982     queue;
983 
984   cl_int
985     clStatus;
986 
987   cl_mem_flags
988     mem_flags;
989 
990   cl_mem
991     histogramBuffer,
992     imageBuffer,
993     stretchMapBuffer;
994 
995   cl_kernel
996     histogramKernel,
997     stretchKernel;
998 
999   cl_event
1000     event;
1001 
1002   cl_uint4
1003     *histogram;
1004 
1005   double
1006     intensity;
1007 
1008   FloatPixelPacket
1009     black,
1010     white;
1011 
1012   MagickBooleanType
1013     outputReady,
1014     status;
1015 
1016   MagickCLDevice
1017     device;
1018 
1019   MagickSizeType
1020     length;
1021 
1022   PixelPacket
1023     *stretch_map;
1024 
1025   register ssize_t
1026     i;
1027 
1028   size_t
1029     global_work_size[2];
1030 
1031   void
1032     *hostPtr,
1033     *inputPixels;
1034 
1035   histogram=NULL;
1036   stretch_map=NULL;
1037   inputPixels = NULL;
1038   imageBuffer = NULL;
1039   histogramBuffer = NULL;
1040   stretchMapBuffer = NULL;
1041   histogramKernel = NULL;
1042   stretchKernel = NULL;
1043   queue = NULL;
1044   outputReady = MagickFalse;
1045 
1046 
1047   assert(image != (Image *) NULL);
1048   assert(image->signature == MagickCoreSignature);
1049   if (image->debug != MagickFalse)
1050     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1051 
1052   //exception=(&image->exception);
1053 
1054   /*
1055    * initialize opencl env
1056    */
1057   device = RequestOpenCLDevice(clEnv);
1058   queue = AcquireOpenCLCommandQueue(device);
1059 
1060   /*
1061     Allocate and initialize histogram arrays.
1062   */
1063   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1064 
1065   if (histogram == (cl_uint4 *) NULL)
1066     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1067 
1068   /* reset histogram */
1069   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
1070 
1071   /*
1072   if (IsGrayImage(image,exception) != MagickFalse)
1073     (void) SetImageColorspace(image,GRAYColorspace);
1074   */
1075 
1076   status=MagickTrue;
1077 
1078 
1079   /*
1080     Form histogram.
1081   */
1082   /* Create and initialize OpenCL buffers. */
1083   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1084   /* assume this  will get a writable image */
1085   image_view=AcquireAuthenticCacheView(image,exception);
1086   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1087 
1088   if (inputPixels == (void *) NULL)
1089   {
1090     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1091     goto cleanup;
1092   }
1093   /* If the host pointer is aligned to the size of CLPixelPacket,
1094      then use the host buffer directly from the GPU; otherwise,
1095      create a buffer on the GPU and copy the data over */
1096   if (ALIGNED(inputPixels,CLPixelPacket))
1097   {
1098     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1099   }
1100   else
1101   {
1102     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1103   }
1104   /* create a CL buffer from image pixel buffer */
1105   length = image->columns * image->rows;
1106   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1107   if (clStatus != CL_SUCCESS)
1108   {
1109     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1110     goto cleanup;
1111   }
1112 
1113   /* If the host pointer is aligned to the size of cl_uint,
1114      then use the host buffer directly from the GPU; otherwise,
1115      create a buffer on the GPU and copy the data over */
1116   if (ALIGNED(histogram,cl_uint4))
1117   {
1118     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1119     hostPtr = histogram;
1120   }
1121   else
1122   {
1123     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1124     hostPtr = histogram;
1125   }
1126   /* create a CL buffer for histogram  */
1127   length = (MaxMap+1);
1128   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1129   if (clStatus != CL_SUCCESS)
1130   {
1131     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1132     goto cleanup;
1133   }
1134 
1135   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1136   if (status == MagickFalse)
1137     goto cleanup;
1138 
1139   /* read from the kenel output */
1140   if (ALIGNED(histogram,cl_uint4))
1141   {
1142     length = (MaxMap+1);
1143     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1144   }
1145   else
1146   {
1147     length = (MaxMap+1);
1148     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1149   }
1150   if (clStatus != CL_SUCCESS)
1151   {
1152     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1153     goto cleanup;
1154   }
1155 
1156   /* unmap, don't block gpu to use this buffer again.  */
1157   if (ALIGNED(histogram,cl_uint4))
1158   {
1159     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1160     if (clStatus != CL_SUCCESS)
1161     {
1162       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1163       goto cleanup;
1164     }
1165   }
1166 
1167   /* recreate input buffer later, in case image updated */
1168 #ifdef RECREATEBUFFER
1169   if (imageBuffer!=NULL)
1170     clEnv->library->clReleaseMemObject(imageBuffer);
1171 #endif
1172 
1173   /* CPU stuff */
1174   /*
1175      Find the histogram boundaries by locating the black/white levels.
1176   */
1177   black.red=0.0;
1178   white.red=MaxRange(QuantumRange);
1179   if ((image->channel_mask & RedChannel) != 0)
1180   {
1181     intensity=0.0;
1182     for (i=0; i <= (ssize_t) MaxMap; i++)
1183     {
1184       intensity+=histogram[i].s[2];
1185       if (intensity > black_point)
1186         break;
1187     }
1188     black.red=(MagickRealType) i;
1189     intensity=0.0;
1190     for (i=(ssize_t) MaxMap; i != 0; i--)
1191     {
1192       intensity+=histogram[i].s[2];
1193       if (intensity > ((double) image->columns*image->rows-white_point))
1194         break;
1195     }
1196     white.red=(MagickRealType) i;
1197   }
1198   black.green=0.0;
1199   white.green=MaxRange(QuantumRange);
1200   if ((image->channel_mask & GreenChannel) != 0)
1201   {
1202     intensity=0.0;
1203     for (i=0; i <= (ssize_t) MaxMap; i++)
1204     {
1205       intensity+=histogram[i].s[2];
1206       if (intensity > black_point)
1207         break;
1208     }
1209     black.green=(MagickRealType) i;
1210     intensity=0.0;
1211     for (i=(ssize_t) MaxMap; i != 0; i--)
1212     {
1213       intensity+=histogram[i].s[2];
1214       if (intensity > ((double) image->columns*image->rows-white_point))
1215         break;
1216     }
1217     white.green=(MagickRealType) i;
1218   }
1219   black.blue=0.0;
1220   white.blue=MaxRange(QuantumRange);
1221   if ((image->channel_mask & BlueChannel) != 0)
1222   {
1223     intensity=0.0;
1224     for (i=0; i <= (ssize_t) MaxMap; i++)
1225     {
1226       intensity+=histogram[i].s[2];
1227       if (intensity > black_point)
1228         break;
1229     }
1230     black.blue=(MagickRealType) i;
1231     intensity=0.0;
1232     for (i=(ssize_t) MaxMap; i != 0; i--)
1233     {
1234       intensity+=histogram[i].s[2];
1235       if (intensity > ((double) image->columns*image->rows-white_point))
1236         break;
1237     }
1238     white.blue=(MagickRealType) i;
1239   }
1240   black.alpha=0.0;
1241   white.alpha=MaxRange(QuantumRange);
1242   if ((image->channel_mask & AlphaChannel) != 0)
1243   {
1244     intensity=0.0;
1245     for (i=0; i <= (ssize_t) MaxMap; i++)
1246     {
1247       intensity+=histogram[i].s[2];
1248       if (intensity > black_point)
1249         break;
1250     }
1251     black.alpha=(MagickRealType) i;
1252     intensity=0.0;
1253     for (i=(ssize_t) MaxMap; i != 0; i--)
1254     {
1255       intensity+=histogram[i].s[2];
1256       if (intensity > ((double) image->columns*image->rows-white_point))
1257         break;
1258     }
1259     white.alpha=(MagickRealType) i;
1260   }
1261   /*
1262   black.index=0.0;
1263   white.index=MaxRange(QuantumRange);
1264   if (((channel & IndexChannel) != 0) && (image->colorspace == CMYKColorspace))
1265   {
1266     intensity=0.0;
1267     for (i=0; i <= (ssize_t) MaxMap; i++)
1268     {
1269       intensity+=histogram[i].index;
1270       if (intensity > black_point)
1271         break;
1272     }
1273     black.index=(MagickRealType) i;
1274     intensity=0.0;
1275     for (i=(ssize_t) MaxMap; i != 0; i--)
1276     {
1277       intensity+=histogram[i].index;
1278       if (intensity > ((double) image->columns*image->rows-white_point))
1279         break;
1280     }
1281     white.index=(MagickRealType) i;
1282   }
1283   */
1284 
1285 
1286   stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1287     sizeof(*stretch_map));
1288 
1289   if (stretch_map == (PixelPacket *) NULL)
1290     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1291       image->filename);
1292 
1293   /*
1294     Stretch the histogram to create the stretched image mapping.
1295   */
1296   (void) ResetMagickMemory(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1297   for (i=0; i <= (ssize_t) MaxMap; i++)
1298   {
1299     if ((image->channel_mask & RedChannel) != 0)
1300     {
1301       if (i < (ssize_t) black.red)
1302         stretch_map[i].red=(Quantum) 0;
1303       else
1304         if (i > (ssize_t) white.red)
1305           stretch_map[i].red=QuantumRange;
1306         else
1307           if (black.red != white.red)
1308             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1309                   (i-black.red)/(white.red-black.red)));
1310     }
1311     if ((image->channel_mask & GreenChannel) != 0)
1312     {
1313       if (i < (ssize_t) black.green)
1314         stretch_map[i].green=0;
1315       else
1316         if (i > (ssize_t) white.green)
1317           stretch_map[i].green=QuantumRange;
1318         else
1319           if (black.green != white.green)
1320             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1321                   (i-black.green)/(white.green-black.green)));
1322     }
1323     if ((image->channel_mask & BlueChannel) != 0)
1324     {
1325       if (i < (ssize_t) black.blue)
1326         stretch_map[i].blue=0;
1327       else
1328         if (i > (ssize_t) white.blue)
1329           stretch_map[i].blue= QuantumRange;
1330         else
1331           if (black.blue != white.blue)
1332             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1333                   (i-black.blue)/(white.blue-black.blue)));
1334     }
1335     if ((image->channel_mask & AlphaChannel) != 0)
1336     {
1337       if (i < (ssize_t) black.alpha)
1338         stretch_map[i].alpha=0;
1339       else
1340         if (i > (ssize_t) white.alpha)
1341           stretch_map[i].alpha=QuantumRange;
1342         else
1343           if (black.alpha != white.alpha)
1344             stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1345                   (i-black.alpha)/(white.alpha-black.alpha)));
1346     }
1347     /*
1348     if (((channel & IndexChannel) != 0) &&
1349         (image->colorspace == CMYKColorspace))
1350     {
1351       if (i < (ssize_t) black.index)
1352         stretch_map[i].index=0;
1353       else
1354         if (i > (ssize_t) white.index)
1355           stretch_map[i].index=QuantumRange;
1356         else
1357           if (black.index != white.index)
1358             stretch_map[i].index=ScaleMapToQuantum((MagickRealType) (MaxMap*
1359                   (i-black.index)/(white.index-black.index)));
1360     }
1361     */
1362   }
1363 
1364   /*
1365     Stretch the image.
1366   */
1367   if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1368       (image->colorspace == CMYKColorspace)))
1369     image->storage_class=DirectClass;
1370   if (image->storage_class == PseudoClass)
1371   {
1372     /*
1373        Stretch colormap.
1374        */
1375     for (i=0; i < (ssize_t) image->colors; i++)
1376     {
1377       if ((image->channel_mask & RedChannel) != 0)
1378       {
1379         if (black.red != white.red)
1380           image->colormap[i].red=stretch_map[
1381             ScaleQuantumToMap(image->colormap[i].red)].red;
1382       }
1383       if ((image->channel_mask & GreenChannel) != 0)
1384       {
1385         if (black.green != white.green)
1386           image->colormap[i].green=stretch_map[
1387             ScaleQuantumToMap(image->colormap[i].green)].green;
1388       }
1389       if ((image->channel_mask & BlueChannel) != 0)
1390       {
1391         if (black.blue != white.blue)
1392           image->colormap[i].blue=stretch_map[
1393             ScaleQuantumToMap(image->colormap[i].blue)].blue;
1394       }
1395       if ((image->channel_mask & AlphaChannel) != 0)
1396       {
1397         if (black.alpha != white.alpha)
1398           image->colormap[i].alpha=stretch_map[
1399             ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1400       }
1401     }
1402   }
1403 
1404   /*
1405     Stretch image.
1406   */
1407 
1408 
1409   /* GPU can work on this again, image and equalize map as input
1410     image:        uchar4 (CLPixelPacket)
1411     stretch_map:  uchar4 (PixelPacket)
1412     black, white: float4 (FloatPixelPacket) */
1413 
1414 #ifdef RECREATEBUFFER
1415   /* If the host pointer is aligned to the size of CLPixelPacket,
1416      then use the host buffer directly from the GPU; otherwise,
1417      create a buffer on the GPU and copy the data over */
1418   if (ALIGNED(inputPixels,CLPixelPacket))
1419   {
1420     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1421   }
1422   else
1423   {
1424     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1425   }
1426   /* create a CL buffer from image pixel buffer */
1427   length = image->columns * image->rows;
1428   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1429   if (clStatus != CL_SUCCESS)
1430   {
1431     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1432     goto cleanup;
1433   }
1434 #endif
1435 
1436   /* Create and initialize OpenCL buffers. */
1437   if (ALIGNED(stretch_map, PixelPacket))
1438   {
1439     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1440     hostPtr = stretch_map;
1441   }
1442   else
1443   {
1444     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1445     hostPtr = stretch_map;
1446   }
1447   /* create a CL buffer for stretch_map  */
1448   length = (MaxMap+1);
1449   stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1450   if (clStatus != CL_SUCCESS)
1451   {
1452     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1453     goto cleanup;
1454   }
1455 
1456   /* get the OpenCL kernel */
1457   stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1458   if (stretchKernel == NULL)
1459   {
1460     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1461     goto cleanup;
1462   }
1463 
1464   /* set the kernel arguments */
1465   i = 0;
1466   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1467   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1468   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1469   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&white);
1470   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(FloatPixelPacket),&black);
1471   if (clStatus != CL_SUCCESS)
1472   {
1473     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1474     goto cleanup;
1475   }
1476 
1477   /* launch the kernel */
1478   global_work_size[0] = image->columns;
1479   global_work_size[1] = image->rows;
1480 
1481   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1482 
1483   if (clStatus != CL_SUCCESS)
1484   {
1485     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1486     goto cleanup;
1487   }
1488   RecordProfileData(device,stretchKernel,event);
1489 
1490   /* read the data back */
1491   if (ALIGNED(inputPixels,CLPixelPacket))
1492   {
1493     length = image->columns * image->rows;
1494     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1495   }
1496   else
1497   {
1498     length = image->columns * image->rows;
1499     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1500   }
1501   if (clStatus != CL_SUCCESS)
1502   {
1503     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1504     goto cleanup;
1505   }
1506 
1507   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1508 
1509 cleanup:
1510 
1511   image_view=DestroyCacheView(image_view);
1512 
1513   if (imageBuffer!=NULL)
1514     clEnv->library->clReleaseMemObject(imageBuffer);
1515 
1516   if (stretchMapBuffer!=NULL)
1517     clEnv->library->clReleaseMemObject(stretchMapBuffer);
1518   if (stretch_map!=NULL)
1519     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1520   if (histogramBuffer!=NULL)
1521     clEnv->library->clReleaseMemObject(histogramBuffer);
1522   if (histogram!=NULL)
1523     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1524   if (histogramKernel!=NULL)
1525     ReleaseOpenCLKernel(histogramKernel);
1526   if (stretchKernel!=NULL)
1527     ReleaseOpenCLKernel(stretchKernel);
1528   if (queue != NULL)
1529     ReleaseOpenCLCommandQueue(device,queue);
1530   if (device != NULL)
1531     ReleaseOpenCLDevice(device);
1532 
1533   return(outputReady);
1534 }
1535 
AccelerateContrastStretchImage(Image * image,const double black_point,const double white_point,ExceptionInfo * exception)1536 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1537   Image *image,const double black_point,const double white_point,
1538   ExceptionInfo *exception)
1539 {
1540   MagickBooleanType
1541     status;
1542 
1543   MagickCLEnv
1544     clEnv;
1545 
1546   assert(image != NULL);
1547   assert(exception != (ExceptionInfo *) NULL);
1548 
1549   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1550       (checkHistogramCondition(image,image->intensity) == MagickFalse))
1551     return(MagickFalse);
1552 
1553   clEnv=getOpenCLEnvironment(exception);
1554   if (clEnv == (MagickCLEnv) NULL)
1555     return(MagickFalse);
1556 
1557   status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1558     exception);
1559   return(status);
1560 }
1561 
1562 /*
1563 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1564 %                                                                             %
1565 %                                                                             %
1566 %                                                                             %
1567 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
1568 %                                                                             %
1569 %                                                                             %
1570 %                                                                             %
1571 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1572 */
1573 
ComputeConvolveImage(const Image * image,MagickCLEnv clEnv,const KernelInfo * kernel,ExceptionInfo * exception)1574 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1575   const KernelInfo *kernel,ExceptionInfo *exception)
1576 {
1577   CacheView
1578     *filteredImage_view,
1579     *image_view;
1580 
1581   cl_command_queue
1582     queue;
1583 
1584   cl_event
1585     event;
1586 
1587   cl_kernel
1588     clkernel;
1589 
1590   cl_int
1591     clStatus;
1592 
1593   cl_mem
1594     convolutionKernel,
1595     filteredImageBuffer,
1596     imageBuffer;
1597 
1598   cl_mem_flags
1599     mem_flags;
1600 
1601   const void
1602     *inputPixels;
1603 
1604   float
1605     *kernelBufferPtr;
1606 
1607   Image
1608     *filteredImage;
1609 
1610   MagickBooleanType
1611     outputReady;
1612 
1613   MagickCLDevice
1614     device;
1615 
1616   MagickSizeType
1617     length;
1618 
1619   size_t
1620     global_work_size[3],
1621     localGroupSize[3],
1622     localMemoryRequirement;
1623 
1624   unsigned
1625     kernelSize;
1626 
1627   unsigned int
1628     filterHeight,
1629     filterWidth,
1630     i,
1631     imageHeight,
1632     imageWidth,
1633     matte;
1634 
1635   void
1636     *filteredPixels,
1637     *hostPtr;
1638 
1639   /* intialize all CL objects to NULL */
1640   imageBuffer = NULL;
1641   filteredImageBuffer = NULL;
1642   convolutionKernel = NULL;
1643   clkernel = NULL;
1644   queue = NULL;
1645 
1646   filteredImage = NULL;
1647   filteredImage_view = NULL;
1648   outputReady = MagickFalse;
1649 
1650   device = RequestOpenCLDevice(clEnv);
1651 
1652   image_view=AcquireAuthenticCacheView(image,exception);
1653   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1654   if (inputPixels == (const void *) NULL)
1655   {
1656     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1657     goto cleanup;
1658   }
1659 
1660   /* Create and initialize OpenCL buffers. */
1661 
1662   /* If the host pointer is aligned to the size of CLPixelPacket,
1663      then use the host buffer directly from the GPU; otherwise,
1664      create a buffer on the GPU and copy the data over */
1665   if (ALIGNED(inputPixels,CLPixelPacket))
1666   {
1667     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1668   }
1669   else
1670   {
1671     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1672   }
1673   /* create a CL buffer from image pixel buffer */
1674   length = image->columns * image->rows;
1675   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1676   if (clStatus != CL_SUCCESS)
1677   {
1678     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1679     goto cleanup;
1680   }
1681 
1682   filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
1683   assert(filteredImage != NULL);
1684   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1685   {
1686     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1687     goto cleanup;
1688   }
1689   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1690   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1691   if (filteredPixels == (void *) NULL)
1692   {
1693     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1694     goto cleanup;
1695   }
1696 
1697   if (ALIGNED(filteredPixels,CLPixelPacket))
1698   {
1699     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1700     hostPtr = filteredPixels;
1701   }
1702   else
1703   {
1704     mem_flags = CL_MEM_WRITE_ONLY;
1705     hostPtr = NULL;
1706   }
1707   /* create a CL buffer from image pixel buffer */
1708   length = image->columns * image->rows;
1709   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1710   if (clStatus != CL_SUCCESS)
1711   {
1712     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1713     goto cleanup;
1714   }
1715 
1716   kernelSize = (unsigned int) (kernel->width * kernel->height);
1717   convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1718   if (clStatus != CL_SUCCESS)
1719   {
1720     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1721     goto cleanup;
1722   }
1723 
1724   queue = AcquireOpenCLCommandQueue(device);
1725 
1726   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1727           , 0, NULL, NULL, &clStatus);
1728   if (clStatus != CL_SUCCESS)
1729   {
1730     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1731     goto cleanup;
1732   }
1733   for (i = 0; i < kernelSize; i++)
1734   {
1735     kernelBufferPtr[i] = (float) kernel->values[i];
1736   }
1737   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1738   if (clStatus != CL_SUCCESS)
1739   {
1740     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1741     goto cleanup;
1742   }
1743 
1744   /* Compute the local memory requirement for a 16x16 workgroup.
1745      If it's larger than 16k, reduce the workgroup size to 8x8 */
1746   localGroupSize[0] = 16;
1747   localGroupSize[1] = 16;
1748   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1749     + kernel->width*kernel->height*sizeof(float);
1750 
1751   if (localMemoryRequirement > device->local_memory_size)
1752   {
1753     localGroupSize[0] = 8;
1754     localGroupSize[1] = 8;
1755     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1756       + kernel->width*kernel->height*sizeof(float);
1757   }
1758   if (localMemoryRequirement <= device->local_memory_size)
1759   {
1760     /* get the OpenCL kernel */
1761     clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1762     if (clkernel == NULL)
1763     {
1764       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1765       goto cleanup;
1766     }
1767 
1768     /* set the kernel arguments */
1769     i = 0;
1770     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1771     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1772     imageWidth = (unsigned int) image->columns;
1773     imageHeight = (unsigned int) image->rows;
1774     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1775     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1776     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1777     filterWidth = (unsigned int) kernel->width;
1778     filterHeight = (unsigned int) kernel->height;
1779     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1780     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1781     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1782     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1783     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1784     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1785     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1786     if (clStatus != CL_SUCCESS)
1787     {
1788       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1789       goto cleanup;
1790     }
1791 
1792     /* pad the global size to a multiple of the local work size dimension */
1793     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1794     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1795 
1796     /* launch the kernel */
1797     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1798     if (clStatus != CL_SUCCESS)
1799     {
1800       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1801       goto cleanup;
1802     }
1803     RecordProfileData(device,clkernel,event);
1804   }
1805   else
1806   {
1807     /* get the OpenCL kernel */
1808     clkernel = AcquireOpenCLKernel(device,"Convolve");
1809     if (clkernel == NULL)
1810     {
1811       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1812       goto cleanup;
1813     }
1814 
1815     /* set the kernel arguments */
1816     i = 0;
1817     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1818     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1819     imageWidth = (unsigned int) image->columns;
1820     imageHeight = (unsigned int) image->rows;
1821     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1822     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1823     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1824     filterWidth = (unsigned int) kernel->width;
1825     filterHeight = (unsigned int) kernel->height;
1826     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1827     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1828     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1829     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1830     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1831     if (clStatus != CL_SUCCESS)
1832     {
1833       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1834       goto cleanup;
1835     }
1836 
1837     localGroupSize[0] = 8;
1838     localGroupSize[1] = 8;
1839     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1840     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1841 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1842 
1843     if (clStatus != CL_SUCCESS)
1844     {
1845       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1846       goto cleanup;
1847     }
1848   }
1849   RecordProfileData(device,clkernel,event);
1850 
1851   if (ALIGNED(filteredPixels,CLPixelPacket))
1852   {
1853     length = image->columns * image->rows;
1854     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1855   }
1856   else
1857   {
1858     length = image->columns * image->rows;
1859     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1860   }
1861   if (clStatus != CL_SUCCESS)
1862   {
1863     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1864     goto cleanup;
1865   }
1866 
1867   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1868 
1869 cleanup:
1870 
1871   image_view=DestroyCacheView(image_view);
1872   if (filteredImage_view != NULL)
1873     filteredImage_view=DestroyCacheView(filteredImage_view);
1874   if (imageBuffer != NULL)
1875     clEnv->library->clReleaseMemObject(imageBuffer);
1876   if (filteredImageBuffer != NULL)
1877     clEnv->library->clReleaseMemObject(filteredImageBuffer);
1878   if (convolutionKernel != NULL)
1879     clEnv->library->clReleaseMemObject(convolutionKernel);
1880   if (clkernel != NULL)
1881     ReleaseOpenCLKernel(clkernel);
1882   if (queue != NULL)
1883     ReleaseOpenCLCommandQueue(device,queue);
1884   if (device != NULL)
1885     ReleaseOpenCLDevice(device);
1886   if (outputReady == MagickFalse)
1887   {
1888     if (filteredImage != NULL)
1889     {
1890       DestroyImage(filteredImage);
1891       filteredImage = NULL;
1892     }
1893   }
1894 
1895   return(filteredImage);
1896 }
1897 
AccelerateConvolveImage(const Image * image,const KernelInfo * kernel,ExceptionInfo * exception)1898 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1899   const KernelInfo *kernel,ExceptionInfo *exception)
1900 {
1901   /* Temporary disabled due to access violation
1902 
1903   Image
1904     *filteredImage;
1905 
1906   assert(image != NULL);
1907   assert(kernel != (KernelInfo *) NULL);
1908   assert(exception != (ExceptionInfo *) NULL);
1909   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1910       (checkOpenCLEnvironment(exception) == MagickFalse))
1911     return((Image *) NULL);
1912 
1913   filteredImage=ComputeConvolveImage(image,kernel,exception);
1914   return(filteredImage);
1915   */
1916   magick_unreferenced(image);
1917   magick_unreferenced(kernel);
1918   magick_unreferenced(exception);
1919   return((Image *)NULL);
1920 }
1921 
1922 /*
1923 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1924 %                                                                             %
1925 %                                                                             %
1926 %                                                                             %
1927 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
1928 %                                                                             %
1929 %                                                                             %
1930 %                                                                             %
1931 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1932 */
1933 
ComputeDespeckleImage(const Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)1934 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1935   ExceptionInfo*exception)
1936 {
1937   static const int
1938     X[4] = {0, 1, 1,-1},
1939     Y[4] = {1, 0, 1, 1};
1940 
1941   CacheView
1942     *filteredImage_view,
1943     *image_view;
1944 
1945   cl_command_queue
1946     queue;
1947 
1948   cl_int
1949     clStatus;
1950 
1951   cl_kernel
1952     hullPass1,
1953     hullPass2;
1954 
1955   cl_event
1956     event;
1957 
1958   cl_mem_flags
1959     mem_flags;
1960 
1961   cl_mem
1962     filteredImageBuffer,
1963     imageBuffer,
1964     tempImageBuffer[2];
1965 
1966   const void
1967     *inputPixels;
1968 
1969   Image
1970     *filteredImage;
1971 
1972   int
1973     k,
1974     matte;
1975 
1976   MagickBooleanType
1977     outputReady;
1978 
1979   MagickCLDevice
1980     device;
1981 
1982   MagickSizeType
1983     length;
1984 
1985   size_t
1986     global_work_size[2];
1987 
1988   unsigned int
1989     imageHeight,
1990     imageWidth;
1991 
1992   void
1993     *filteredPixels,
1994     *hostPtr;
1995 
1996   outputReady = MagickFalse;
1997   inputPixels = NULL;
1998   filteredImage = NULL;
1999   filteredImage_view = NULL;
2000   filteredPixels = NULL;
2001   imageBuffer = NULL;
2002   filteredImageBuffer = NULL;
2003   hullPass1 = NULL;
2004   hullPass2 = NULL;
2005   queue = NULL;
2006   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
2007 
2008   device = RequestOpenCLDevice(clEnv);
2009   queue = AcquireOpenCLCommandQueue(device);
2010 
2011   image_view=AcquireAuthenticCacheView(image,exception);
2012   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2013   if (inputPixels == (void *) NULL)
2014   {
2015     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2016     goto cleanup;
2017   }
2018 
2019   if (ALIGNED(inputPixels,CLPixelPacket))
2020   {
2021     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2022   }
2023   else
2024   {
2025     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2026   }
2027   /* create a CL buffer from image pixel buffer */
2028   length = image->columns * image->rows;
2029   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2030   if (clStatus != CL_SUCCESS)
2031   {
2032     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2033     goto cleanup;
2034   }
2035 
2036   mem_flags = CL_MEM_READ_WRITE;
2037   length = image->columns * image->rows;
2038   for (k = 0; k < 2; k++)
2039   {
2040     tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
2041     if (clStatus != CL_SUCCESS)
2042     {
2043       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2044       goto cleanup;
2045     }
2046   }
2047 
2048   filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
2049   assert(filteredImage != NULL);
2050   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
2051   {
2052     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
2053     goto cleanup;
2054   }
2055   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
2056   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
2057   if (filteredPixels == (void *) NULL)
2058   {
2059     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
2060     goto cleanup;
2061   }
2062 
2063   if (ALIGNED(filteredPixels,CLPixelPacket))
2064   {
2065     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
2066     hostPtr = filteredPixels;
2067   }
2068   else
2069   {
2070     mem_flags = CL_MEM_WRITE_ONLY;
2071     hostPtr = NULL;
2072   }
2073   /* create a CL buffer from image pixel buffer */
2074   length = image->columns * image->rows;
2075   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
2076   if (clStatus != CL_SUCCESS)
2077   {
2078     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2079     goto cleanup;
2080   }
2081 
2082   hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2083   hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2084 
2085   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2086   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2087   imageWidth = (unsigned int) image->columns;
2088   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2089   imageHeight = (unsigned int) image->rows;
2090   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2091   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2092   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2093   if (clStatus != CL_SUCCESS)
2094   {
2095     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2096     goto cleanup;
2097   }
2098 
2099   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2100   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2101   imageWidth = (unsigned int) image->columns;
2102   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2103   imageHeight = (unsigned int) image->rows;
2104   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2105   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2106   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2107   if (clStatus != CL_SUCCESS)
2108   {
2109     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2110     goto cleanup;
2111   }
2112 
2113 
2114   global_work_size[0] = image->columns;
2115   global_work_size[1] = image->rows;
2116 
2117 
2118   for (k = 0; k < 4; k++)
2119   {
2120     cl_int2 offset;
2121     int polarity;
2122 
2123 
2124     offset.s[0] = X[k];
2125     offset.s[1] = Y[k];
2126     polarity = 1;
2127     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2128     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2129     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2130     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2131     if (clStatus != CL_SUCCESS)
2132     {
2133       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2134       goto cleanup;
2135     }
2136     /* launch the kernel */
2137 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2138     if (clStatus != CL_SUCCESS)
2139     {
2140       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2141       goto cleanup;
2142     }
2143     RecordProfileData(device,hullPass1,event);
2144 
2145     /* launch the kernel */
2146 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2147     if (clStatus != CL_SUCCESS)
2148     {
2149       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2150       goto cleanup;
2151     }
2152     RecordProfileData(device,hullPass2,event);
2153 
2154     if (k == 0)
2155       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2156     offset.s[0] = -X[k];
2157     offset.s[1] = -Y[k];
2158     polarity = 1;
2159     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2160     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2161     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2162     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2163     if (clStatus != CL_SUCCESS)
2164     {
2165       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2166       goto cleanup;
2167     }
2168     /* launch the kernel */
2169 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2170     if (clStatus != CL_SUCCESS)
2171     {
2172       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2173       goto cleanup;
2174     }
2175     RecordProfileData(device,hullPass1,event);
2176 
2177     /* launch the kernel */
2178 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2179     if (clStatus != CL_SUCCESS)
2180     {
2181       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2182       goto cleanup;
2183     }
2184     RecordProfileData(device,hullPass2,event);
2185 
2186     offset.s[0] = -X[k];
2187     offset.s[1] = -Y[k];
2188     polarity = -1;
2189     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2190     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2191     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2192     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2193     if (clStatus != CL_SUCCESS)
2194     {
2195       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2196       goto cleanup;
2197     }
2198     /* launch the kernel */
2199 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2200     if (clStatus != CL_SUCCESS)
2201     {
2202       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2203       goto cleanup;
2204     }
2205     RecordProfileData(device,hullPass1,event);
2206 
2207     /* launch the kernel */
2208 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2209     if (clStatus != CL_SUCCESS)
2210     {
2211       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2212       goto cleanup;
2213     }
2214     RecordProfileData(device,hullPass2,event);
2215 
2216     offset.s[0] = X[k];
2217     offset.s[1] = Y[k];
2218     polarity = -1;
2219     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2220     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2221     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2222     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2223 
2224     if (k == 3)
2225       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2226 
2227     if (clStatus != CL_SUCCESS)
2228     {
2229       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2230       goto cleanup;
2231     }
2232     /* launch the kernel */
2233 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2234     if (clStatus != CL_SUCCESS)
2235     {
2236       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2237       goto cleanup;
2238     }
2239     RecordProfileData(device,hullPass1,event);
2240 
2241     /* launch the kernel */
2242 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2243     if (clStatus != CL_SUCCESS)
2244     {
2245       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2246       goto cleanup;
2247     }
2248     RecordProfileData(device,hullPass2,event);
2249   }
2250 
2251   if (ALIGNED(filteredPixels,CLPixelPacket))
2252   {
2253     length = image->columns * image->rows;
2254     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2255   }
2256   else
2257   {
2258     length = image->columns * image->rows;
2259     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2260   }
2261   if (clStatus != CL_SUCCESS)
2262   {
2263     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2264     goto cleanup;
2265   }
2266 
2267   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2268 
2269 cleanup:
2270 
2271   image_view=DestroyCacheView(image_view);
2272   if (filteredImage_view != NULL)
2273     filteredImage_view=DestroyCacheView(filteredImage_view);
2274 
2275   if (queue != NULL)
2276     ReleaseOpenCLCommandQueue(device,queue);
2277   if (device != NULL)
2278     ReleaseOpenCLDevice(device);
2279   if (imageBuffer!=NULL)
2280     clEnv->library->clReleaseMemObject(imageBuffer);
2281   for (k = 0; k < 2; k++)
2282   {
2283     if (tempImageBuffer[k]!=NULL)
2284       clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2285   }
2286   if (filteredImageBuffer!=NULL)
2287     clEnv->library->clReleaseMemObject(filteredImageBuffer);
2288   if (hullPass1!=NULL)
2289     ReleaseOpenCLKernel(hullPass1);
2290   if (hullPass2!=NULL)
2291     ReleaseOpenCLKernel(hullPass2);
2292   if (outputReady == MagickFalse && filteredImage != NULL)
2293     filteredImage=DestroyImage(filteredImage);
2294 
2295   return(filteredImage);
2296 }
2297 
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2298 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2299   ExceptionInfo* exception)
2300 {
2301   Image
2302     *filteredImage;
2303 
2304   MagickCLEnv
2305     clEnv;
2306 
2307   assert(image != NULL);
2308   assert(exception != (ExceptionInfo *) NULL);
2309 
2310   if (checkAccelerateConditionRGBA(image) == MagickFalse)
2311     return((Image *) NULL);
2312 
2313   clEnv=getOpenCLEnvironment(exception);
2314   if (clEnv == (MagickCLEnv) NULL)
2315     return((Image *) NULL);
2316 
2317   filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2318   return(filteredImage);
2319 }
2320 
2321 /*
2322 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2323 %                                                                             %
2324 %                                                                             %
2325 %                                                                             %
2326 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
2327 %                                                                             %
2328 %                                                                             %
2329 %                                                                             %
2330 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2331 */
2332 
ComputeEqualizeImage(Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)2333 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2334   ExceptionInfo *exception)
2335 {
2336 #define EqualizeImageTag  "Equalize/Image"
2337 
2338   CacheView
2339     *image_view;
2340 
2341   cl_command_queue
2342     queue;
2343 
2344   cl_int
2345     clStatus;
2346 
2347   cl_mem_flags
2348     mem_flags;
2349 
2350   cl_mem
2351     equalizeMapBuffer,
2352     histogramBuffer,
2353     imageBuffer;
2354 
2355   cl_kernel
2356     equalizeKernel,
2357     histogramKernel;
2358 
2359   cl_event
2360     event;
2361 
2362   cl_uint4
2363     *histogram;
2364 
2365   FloatPixelPacket
2366     white,
2367     black,
2368     intensity,
2369     *map;
2370 
2371   MagickBooleanType
2372     outputReady,
2373     status;
2374 
2375   MagickCLDevice
2376     device;
2377 
2378   MagickSizeType
2379     length;
2380 
2381   PixelPacket
2382     *equalize_map;
2383 
2384   register ssize_t
2385     i;
2386 
2387   size_t
2388     global_work_size[2];
2389 
2390   void
2391     *hostPtr,
2392     *inputPixels;
2393 
2394   map=NULL;
2395   histogram=NULL;
2396   equalize_map=NULL;
2397   inputPixels = NULL;
2398   imageBuffer = NULL;
2399   histogramBuffer = NULL;
2400   equalizeMapBuffer = NULL;
2401   histogramKernel = NULL;
2402   equalizeKernel = NULL;
2403   queue = NULL;
2404   outputReady = MagickFalse;
2405 
2406   assert(image != (Image *) NULL);
2407   assert(image->signature == MagickCoreSignature);
2408   if (image->debug != MagickFalse)
2409     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2410 
2411   /*
2412    * initialize opencl env
2413    */
2414   device = RequestOpenCLDevice(clEnv);
2415   queue = AcquireOpenCLCommandQueue(device);
2416 
2417   /*
2418     Allocate and initialize histogram arrays.
2419   */
2420   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2421   if (histogram == (cl_uint4 *) NULL)
2422       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2423 
2424   /* reset histogram */
2425   (void) ResetMagickMemory(histogram,0,(MaxMap+1)*sizeof(*histogram));
2426 
2427   /* Create and initialize OpenCL buffers. */
2428   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2429   /* assume this  will get a writable image */
2430   image_view=AcquireAuthenticCacheView(image,exception);
2431   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2432 
2433   if (inputPixels == (void *) NULL)
2434   {
2435     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2436     goto cleanup;
2437   }
2438   /* If the host pointer is aligned to the size of CLPixelPacket,
2439      then use the host buffer directly from the GPU; otherwise,
2440      create a buffer on the GPU and copy the data over */
2441   if (ALIGNED(inputPixels,CLPixelPacket))
2442   {
2443     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2444   }
2445   else
2446   {
2447     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2448   }
2449   /* create a CL buffer from image pixel buffer */
2450   length = image->columns * image->rows;
2451   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2452   if (clStatus != CL_SUCCESS)
2453   {
2454     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2455     goto cleanup;
2456   }
2457 
2458   /* If the host pointer is aligned to the size of cl_uint,
2459      then use the host buffer directly from the GPU; otherwise,
2460      create a buffer on the GPU and copy the data over */
2461   if (ALIGNED(histogram,cl_uint4))
2462   {
2463     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2464     hostPtr = histogram;
2465   }
2466   else
2467   {
2468     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2469     hostPtr = histogram;
2470   }
2471   /* create a CL buffer for histogram  */
2472   length = (MaxMap+1);
2473   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2474   if (clStatus != CL_SUCCESS)
2475   {
2476     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2477     goto cleanup;
2478   }
2479 
2480   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2481   if (status == MagickFalse)
2482     goto cleanup;
2483 
2484   /* read from the kenel output */
2485   if (ALIGNED(histogram,cl_uint4))
2486   {
2487     length = (MaxMap+1);
2488     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2489   }
2490   else
2491   {
2492     length = (MaxMap+1);
2493     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2494   }
2495   if (clStatus != CL_SUCCESS)
2496   {
2497     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2498     goto cleanup;
2499   }
2500 
2501   /* unmap, don't block gpu to use this buffer again.  */
2502   if (ALIGNED(histogram,cl_uint4))
2503   {
2504     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2505     if (clStatus != CL_SUCCESS)
2506     {
2507       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2508       goto cleanup;
2509     }
2510   }
2511 
2512   /* recreate input buffer later, in case image updated */
2513 #ifdef RECREATEBUFFER
2514   if (imageBuffer!=NULL)
2515     clEnv->library->clReleaseMemObject(imageBuffer);
2516 #endif
2517 
2518   /* CPU stuff */
2519   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2520   if (equalize_map == (PixelPacket *) NULL)
2521     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2522 
2523   map=(FloatPixelPacket *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2524   if (map == (FloatPixelPacket *) NULL)
2525     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2526 
2527   /*
2528     Integrate the histogram to get the equalization map.
2529   */
2530   (void) ResetMagickMemory(&intensity,0,sizeof(intensity));
2531   for (i=0; i <= (ssize_t) MaxMap; i++)
2532   {
2533     if ((image->channel_mask & SyncChannels) != 0)
2534     {
2535       intensity.red+=histogram[i].s[2];
2536       map[i]=intensity;
2537       continue;
2538     }
2539     if ((image->channel_mask & RedChannel) != 0)
2540       intensity.red+=histogram[i].s[2];
2541     if ((image->channel_mask & GreenChannel) != 0)
2542       intensity.green+=histogram[i].s[1];
2543     if ((image->channel_mask & BlueChannel) != 0)
2544       intensity.blue+=histogram[i].s[0];
2545     if ((image->channel_mask & AlphaChannel) != 0)
2546       intensity.alpha+=histogram[i].s[3];
2547     /*
2548     if (((channel & IndexChannel) != 0) &&
2549         (image->colorspace == CMYKColorspace))
2550     {
2551       intensity.index+=histogram[i].index;
2552     }
2553     */
2554     map[i]=intensity;
2555   }
2556   black=map[0];
2557   white=map[(int) MaxMap];
2558   (void) ResetMagickMemory(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2559   for (i=0; i <= (ssize_t) MaxMap; i++)
2560   {
2561     if ((image->channel_mask & SyncChannels) != 0)
2562     {
2563       if (white.red != black.red)
2564         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2565                 (map[i].red-black.red))/(white.red-black.red)));
2566       continue;
2567     }
2568     if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2569       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2570               (map[i].red-black.red))/(white.red-black.red)));
2571     if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2572       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2573               (map[i].green-black.green))/(white.green-black.green)));
2574     if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2575       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2576               (map[i].blue-black.blue))/(white.blue-black.blue)));
2577     if (((image->channel_mask & AlphaChannel) != 0) && (white.alpha != black.alpha))
2578       equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2579               (map[i].alpha-black.alpha))/(white.alpha-black.alpha)));
2580     /*
2581     if ((((channel & IndexChannel) != 0) &&
2582           (image->colorspace == CMYKColorspace)) &&
2583         (white.index != black.index))
2584       equalize_map[i].index=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2585               (map[i].index-black.index))/(white.index-black.index)));
2586     */
2587   }
2588 
2589   if (image->storage_class == PseudoClass)
2590   {
2591     /*
2592        Equalize colormap.
2593        */
2594     for (i=0; i < (ssize_t) image->colors; i++)
2595     {
2596       if ((image->channel_mask & SyncChannels) != 0)
2597       {
2598         if (white.red != black.red)
2599         {
2600           image->colormap[i].red=equalize_map[
2601             ScaleQuantumToMap(image->colormap[i].red)].red;
2602           image->colormap[i].green=equalize_map[
2603             ScaleQuantumToMap(image->colormap[i].green)].red;
2604           image->colormap[i].blue=equalize_map[
2605             ScaleQuantumToMap(image->colormap[i].blue)].red;
2606           image->colormap[i].alpha=equalize_map[
2607             ScaleQuantumToMap(image->colormap[i].alpha)].red;
2608         }
2609         continue;
2610       }
2611       if (((image->channel_mask & RedChannel) != 0) && (white.red != black.red))
2612         image->colormap[i].red=equalize_map[
2613           ScaleQuantumToMap(image->colormap[i].red)].red;
2614       if (((image->channel_mask & GreenChannel) != 0) && (white.green != black.green))
2615         image->colormap[i].green=equalize_map[
2616           ScaleQuantumToMap(image->colormap[i].green)].green;
2617       if (((image->channel_mask & BlueChannel) != 0) && (white.blue != black.blue))
2618         image->colormap[i].blue=equalize_map[
2619           ScaleQuantumToMap(image->colormap[i].blue)].blue;
2620       if (((image->channel_mask & AlphaChannel) != 0) &&
2621           (white.alpha != black.alpha))
2622         image->colormap[i].alpha=equalize_map[
2623           ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2624     }
2625   }
2626 
2627   /*
2628     Equalize image.
2629   */
2630 
2631   /* GPU can work on this again, image and equalize map as input
2632     image:        uchar4 (CLPixelPacket)
2633     equalize_map: uchar4 (PixelPacket)
2634     black, white: float4 (FloatPixelPacket) */
2635 
2636 #ifdef RECREATEBUFFER
2637   /* If the host pointer is aligned to the size of CLPixelPacket,
2638      then use the host buffer directly from the GPU; otherwise,
2639      create a buffer on the GPU and copy the data over */
2640   if (ALIGNED(inputPixels,CLPixelPacket))
2641   {
2642     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2643   }
2644   else
2645   {
2646     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2647   }
2648   /* create a CL buffer from image pixel buffer */
2649   length = image->columns * image->rows;
2650   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2651   if (clStatus != CL_SUCCESS)
2652   {
2653     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2654     goto cleanup;
2655   }
2656 #endif
2657 
2658   /* Create and initialize OpenCL buffers. */
2659   if (ALIGNED(equalize_map, PixelPacket))
2660   {
2661     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2662     hostPtr = equalize_map;
2663   }
2664   else
2665   {
2666     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2667     hostPtr = equalize_map;
2668   }
2669   /* create a CL buffer for eqaulize_map  */
2670   length = (MaxMap+1);
2671   equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2672   if (clStatus != CL_SUCCESS)
2673   {
2674     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2675     goto cleanup;
2676   }
2677 
2678   /* get the OpenCL kernel */
2679   equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2680   if (equalizeKernel == NULL)
2681   {
2682     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2683     goto cleanup;
2684   }
2685 
2686   /* set the kernel arguments */
2687   i = 0;
2688   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2689   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2690   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2691   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&white);
2692   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(FloatPixelPacket),&black);
2693   if (clStatus != CL_SUCCESS)
2694   {
2695     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2696     goto cleanup;
2697   }
2698 
2699   /* launch the kernel */
2700   global_work_size[0] = image->columns;
2701   global_work_size[1] = image->rows;
2702 
2703   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2704 
2705   if (clStatus != CL_SUCCESS)
2706   {
2707     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2708     goto cleanup;
2709   }
2710   RecordProfileData(device,equalizeKernel,event);
2711 
2712   /* read the data back */
2713   if (ALIGNED(inputPixels,CLPixelPacket))
2714   {
2715     length = image->columns * image->rows;
2716     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2717   }
2718   else
2719   {
2720     length = image->columns * image->rows;
2721     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2722   }
2723   if (clStatus != CL_SUCCESS)
2724   {
2725     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2726     goto cleanup;
2727   }
2728 
2729   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2730 
2731 cleanup:
2732 
2733   image_view=DestroyCacheView(image_view);
2734 
2735   if (imageBuffer!=NULL)
2736     clEnv->library->clReleaseMemObject(imageBuffer);
2737   if (map!=NULL)
2738     map=(FloatPixelPacket *) RelinquishMagickMemory(map);
2739   if (equalizeMapBuffer!=NULL)
2740     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2741   if (equalize_map!=NULL)
2742     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2743   if (histogramBuffer!=NULL)
2744     clEnv->library->clReleaseMemObject(histogramBuffer);
2745   if (histogram!=NULL)
2746     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2747   if (histogramKernel!=NULL)
2748     ReleaseOpenCLKernel(histogramKernel);
2749   if (equalizeKernel!=NULL)
2750     ReleaseOpenCLKernel(equalizeKernel);
2751   if (queue != NULL)
2752     ReleaseOpenCLCommandQueue(device, queue);
2753   if (device != NULL)
2754     ReleaseOpenCLDevice(device);
2755 
2756   return(outputReady);
2757 }
2758 
AccelerateEqualizeImage(Image * image,ExceptionInfo * exception)2759 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2760   ExceptionInfo *exception)
2761 {
2762   MagickBooleanType
2763     status;
2764 
2765   MagickCLEnv
2766     clEnv;
2767 
2768   assert(image != NULL);
2769   assert(exception != (ExceptionInfo *) NULL);
2770 
2771   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2772       (checkHistogramCondition(image,image->intensity) == MagickFalse))
2773     return(MagickFalse);
2774 
2775   clEnv=getOpenCLEnvironment(exception);
2776   if (clEnv == (MagickCLEnv) NULL)
2777     return(MagickFalse);
2778 
2779   status=ComputeEqualizeImage(image,clEnv,exception);
2780   return(status);
2781 }
2782 
2783 /*
2784 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2785 %                                                                             %
2786 %                                                                             %
2787 %                                                                             %
2788 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
2789 %                                                                             %
2790 %                                                                             %
2791 %                                                                             %
2792 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2793 */
2794 
ComputeFunctionImage(Image * image,MagickCLEnv clEnv,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2795 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2796   const MagickFunction function,const size_t number_parameters,
2797   const double *parameters,ExceptionInfo *exception)
2798 {
2799   cl_command_queue
2800     queue;
2801 
2802   cl_int
2803     status;
2804 
2805   cl_kernel
2806     functionKernel;
2807 
2808   cl_mem
2809     imageBuffer,
2810     parametersBuffer;
2811 
2812   cl_uint
2813     number_params,
2814     number_channels;
2815 
2816   float
2817     *parametersBufferPtr;
2818 
2819   MagickBooleanType
2820     outputReady;
2821 
2822   MagickCLDevice
2823     device;
2824 
2825   size_t
2826     gsize[2],
2827     i;
2828 
2829   outputReady=MagickFalse;
2830 
2831   functionKernel=NULL;
2832   parametersBuffer=NULL;
2833 
2834   device=RequestOpenCLDevice(clEnv);
2835   queue=AcquireOpenCLCommandQueue(device);
2836   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2837   if (imageBuffer == (cl_mem) NULL)
2838     goto cleanup;
2839 
2840   parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2841     sizeof(float));
2842   if (parametersBufferPtr == (float *) NULL)
2843     goto cleanup;
2844   for (i=0; i<number_parameters; i++)
2845     parametersBufferPtr[i]=(float) parameters[i];
2846   parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2847     CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2848     parametersBufferPtr);
2849   parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2850   if (parametersBuffer == (cl_mem) NULL)
2851   {
2852     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2853       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2854     goto cleanup;
2855   }
2856 
2857   functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2858   if (functionKernel == (cl_kernel) NULL)
2859   {
2860     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2861       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2862     goto cleanup;
2863   }
2864 
2865   number_channels=(cl_uint) image->number_channels;
2866   number_params=(cl_uint) number_parameters;
2867 
2868   i=0;
2869   status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2870   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2871   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2872   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2873   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2874   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2875   if (status != CL_SUCCESS)
2876   {
2877     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2878       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2879     goto cleanup;
2880   }
2881 
2882   gsize[0]=image->columns;
2883   gsize[1]=image->rows;
2884   outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2885     gsize,(const size_t *) NULL,image,(const Image *) NULL,exception);
2886 
2887 cleanup:
2888 
2889   if (parametersBuffer != (cl_mem) NULL)
2890     ReleaseOpenCLMemObject(parametersBuffer);
2891   if (functionKernel != (cl_kernel) NULL)
2892     ReleaseOpenCLKernel(functionKernel);
2893   if (queue != (cl_command_queue) NULL)
2894     ReleaseOpenCLCommandQueue(device,queue);
2895   if (device != (MagickCLDevice) NULL)
2896     ReleaseOpenCLDevice(device);
2897   return(outputReady);
2898 }
2899 
AccelerateFunctionImage(Image * image,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2900 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2901   const MagickFunction function,const size_t number_parameters,
2902   const double *parameters,ExceptionInfo *exception)
2903 {
2904   MagickBooleanType
2905     status;
2906 
2907   MagickCLEnv
2908     clEnv;
2909 
2910   assert(image != NULL);
2911   assert(exception != (ExceptionInfo *) NULL);
2912 
2913   if (checkAccelerateCondition(image) == MagickFalse)
2914     return(MagickFalse);
2915 
2916   clEnv=getOpenCLEnvironment(exception);
2917   if (clEnv == (MagickCLEnv) NULL)
2918     return(MagickFalse);
2919 
2920   status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2921     parameters,exception);
2922   return(status);
2923 }
2924 
2925 /*
2926 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2927 %                                                                             %
2928 %                                                                             %
2929 %                                                                             %
2930 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2931 %                                                                             %
2932 %                                                                             %
2933 %                                                                             %
2934 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2935 */
2936 
ComputeGrayscaleImage(Image * image,MagickCLEnv clEnv,const PixelIntensityMethod method,ExceptionInfo * exception)2937 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2938   const PixelIntensityMethod method,ExceptionInfo *exception)
2939 {
2940   cl_command_queue
2941     queue;
2942 
2943   cl_int
2944     status;
2945 
2946   cl_kernel
2947     grayscaleKernel;
2948 
2949   cl_mem
2950     imageBuffer;
2951 
2952   cl_uint
2953     number_channels,
2954     colorspace,
2955     intensityMethod;
2956 
2957   MagickBooleanType
2958     outputReady;
2959 
2960   MagickCLDevice
2961     device;
2962 
2963   size_t
2964     gsize[2],
2965     i;
2966 
2967   outputReady=MagickFalse;
2968   grayscaleKernel=NULL;
2969 
2970   assert(image != (Image *) NULL);
2971   assert(image->signature == MagickCoreSignature);
2972   device=RequestOpenCLDevice(clEnv);
2973   queue=AcquireOpenCLCommandQueue(device);
2974   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2975   if (imageBuffer == (cl_mem) NULL)
2976     goto cleanup;
2977 
2978   grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2979   if (grayscaleKernel == (cl_kernel) NULL)
2980   {
2981     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2982       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2983     goto cleanup;
2984   }
2985 
2986   number_channels=(cl_uint) image->number_channels;
2987   intensityMethod=(cl_uint) method;
2988   colorspace=(cl_uint) image->colorspace;
2989 
2990   i=0;
2991   status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2992   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2993   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2994   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2995   if (status != CL_SUCCESS)
2996   {
2997     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2998       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2999     goto cleanup;
3000   }
3001 
3002   gsize[0]=image->columns;
3003   gsize[1]=image->rows;
3004   outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
3005     (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
3006     exception);
3007 
3008 cleanup:
3009 
3010   if (grayscaleKernel != (cl_kernel) NULL)
3011     ReleaseOpenCLKernel(grayscaleKernel);
3012   if (queue != (cl_command_queue) NULL)
3013     ReleaseOpenCLCommandQueue(device,queue);
3014   if (device != (MagickCLDevice) NULL)
3015     ReleaseOpenCLDevice(device);
3016 
3017   return(outputReady);
3018 }
3019 
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)3020 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
3021   const PixelIntensityMethod method,ExceptionInfo *exception)
3022 {
3023   MagickBooleanType
3024     status;
3025 
3026   MagickCLEnv
3027     clEnv;
3028 
3029   assert(image != NULL);
3030   assert(exception != (ExceptionInfo *) NULL);
3031 
3032   if ((checkAccelerateCondition(image) == MagickFalse) ||
3033       (checkPixelIntensity(image,method) == MagickFalse))
3034     return(MagickFalse);
3035 
3036   if (image->number_channels < 3)
3037     return(MagickFalse);
3038 
3039   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
3040       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
3041       (GetPixelBlueTraits(image) == UndefinedPixelTrait))
3042     return(MagickFalse);
3043 
3044   clEnv=getOpenCLEnvironment(exception);
3045   if (clEnv == (MagickCLEnv) NULL)
3046     return(MagickFalse);
3047 
3048   status=ComputeGrayscaleImage(image,clEnv,method,exception);
3049   return(status);
3050 }
3051 
3052 /*
3053 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3054 %                                                                             %
3055 %                                                                             %
3056 %                                                                             %
3057 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
3058 %                                                                             %
3059 %                                                                             %
3060 %                                                                             %
3061 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3062 */
3063 
ComputeLocalContrastImage(const Image * image,MagickCLEnv clEnv,const double radius,const double strength,ExceptionInfo * exception)3064 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
3065   const double radius,const double strength,ExceptionInfo *exception)
3066 {
3067   CacheView
3068     *filteredImage_view,
3069     *image_view;
3070 
3071   cl_command_queue
3072     queue;
3073 
3074   cl_int
3075     clStatus,
3076     iRadius;
3077 
3078   cl_kernel
3079     blurRowKernel,
3080     blurColumnKernel;
3081 
3082   cl_event
3083     event;
3084 
3085   cl_mem
3086     filteredImageBuffer,
3087     imageBuffer,
3088     imageKernelBuffer,
3089     tempImageBuffer;
3090 
3091   cl_mem_flags
3092     mem_flags;
3093 
3094   const void
3095     *inputPixels;
3096 
3097   Image
3098     *filteredImage;
3099 
3100   MagickBooleanType
3101     outputReady;
3102 
3103   MagickCLDevice
3104     device;
3105 
3106   MagickSizeType
3107     length;
3108 
3109   void
3110     *filteredPixels,
3111     *hostPtr;
3112 
3113   unsigned int
3114     i,
3115     imageColumns,
3116     imageRows,
3117     passes;
3118 
3119   filteredImage = NULL;
3120   filteredImage_view = NULL;
3121   imageBuffer = NULL;
3122   filteredImageBuffer = NULL;
3123   tempImageBuffer = NULL;
3124   imageKernelBuffer = NULL;
3125   blurRowKernel = NULL;
3126   blurColumnKernel = NULL;
3127   queue = NULL;
3128   outputReady = MagickFalse;
3129 
3130   device = RequestOpenCLDevice(clEnv);
3131   queue = AcquireOpenCLCommandQueue(device);
3132 
3133   /* Create and initialize OpenCL buffers. */
3134   {
3135     image_view=AcquireAuthenticCacheView(image,exception);
3136     inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3137     if (inputPixels == (const void *) NULL)
3138     {
3139       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3140       goto cleanup;
3141     }
3142 
3143     /* If the host pointer is aligned to the size of CLPixelPacket,
3144      then use the host buffer directly from the GPU; otherwise,
3145      create a buffer on the GPU and copy the data over */
3146     if (ALIGNED(inputPixels,CLPixelPacket))
3147     {
3148       mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3149     }
3150     else
3151     {
3152       mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3153     }
3154     /* create a CL buffer from image pixel buffer */
3155     length = image->columns * image->rows;
3156     imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3157     if (clStatus != CL_SUCCESS)
3158     {
3159       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3160       goto cleanup;
3161     }
3162   }
3163 
3164   /* create output */
3165   {
3166     filteredImage = CloneImage(image,image->columns,image->rows,MagickTrue,exception);
3167     assert(filteredImage != NULL);
3168     if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3169     {
3170       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3171       goto cleanup;
3172     }
3173     filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3174     filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3175     if (filteredPixels == (void *) NULL)
3176     {
3177       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3178       goto cleanup;
3179     }
3180 
3181     if (ALIGNED(filteredPixels,CLPixelPacket))
3182     {
3183       mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3184       hostPtr = filteredPixels;
3185     }
3186     else
3187     {
3188       mem_flags = CL_MEM_WRITE_ONLY;
3189       hostPtr = NULL;
3190     }
3191 
3192     /* create a CL buffer from image pixel buffer */
3193     length = image->columns * image->rows;
3194     filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3195     if (clStatus != CL_SUCCESS)
3196     {
3197       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3198       goto cleanup;
3199     }
3200   }
3201 
3202   {
3203     /* create temp buffer */
3204     {
3205       length = image->columns * image->rows;
3206       tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3207       if (clStatus != CL_SUCCESS)
3208       {
3209         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3210         goto cleanup;
3211       }
3212     }
3213 
3214     /* get the opencl kernel */
3215     {
3216       blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3217       if (blurRowKernel == NULL)
3218       {
3219         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3220         goto cleanup;
3221       };
3222 
3223       blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3224       if (blurColumnKernel == NULL)
3225       {
3226         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3227         goto cleanup;
3228       };
3229     }
3230 
3231     {
3232       imageColumns = (unsigned int) image->columns;
3233       imageRows = (unsigned int) image->rows;
3234       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); // Normalized radius, 100% gives blur radius of 20% of the largest dimension
3235 
3236       passes = (((1.0f * imageColumns) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3237       passes = (passes < 1) ? 1: passes;
3238 
3239       /* set the kernel arguments */
3240       i = 0;
3241       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3242       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3243       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3244       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3245       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3246       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3247 
3248       if (clStatus != CL_SUCCESS)
3249       {
3250         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3251         goto cleanup;
3252       }
3253     }
3254 
3255     /* launch the kernel */
3256     {
3257       int x;
3258       for (x = 0; x < passes; ++x) {
3259         size_t gsize[2];
3260         size_t wsize[2];
3261         size_t goffset[2];
3262 
3263         gsize[0] = 256;
3264         gsize[1] = image->rows / passes;
3265         wsize[0] = 256;
3266         wsize[1] = 1;
3267         goffset[0] = 0;
3268         goffset[1] = x * gsize[1];
3269 
3270         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3271         if (clStatus != CL_SUCCESS)
3272         {
3273           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3274           goto cleanup;
3275         }
3276         clEnv->library->clFlush(queue);
3277         RecordProfileData(device,blurRowKernel,event);
3278       }
3279     }
3280 
3281     {
3282       cl_float FStrength = strength;
3283       i = 0;
3284       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3285       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3286       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3287       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3288       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3289       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3290       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3291 
3292       if (clStatus != CL_SUCCESS)
3293       {
3294         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3295         goto cleanup;
3296       }
3297     }
3298 
3299     /* launch the kernel */
3300     {
3301       int x;
3302       for (x = 0; x < passes; ++x) {
3303         size_t gsize[2];
3304         size_t wsize[2];
3305         size_t goffset[2];
3306 
3307         gsize[0] = ((image->columns + 3) / 4) * 4;
3308         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3309         wsize[0] = 4;
3310         wsize[1] = 64;
3311         goffset[0] = 0;
3312         goffset[1] = x * gsize[1];
3313 
3314         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3315         if (clStatus != CL_SUCCESS)
3316         {
3317           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3318           goto cleanup;
3319         }
3320         clEnv->library->clFlush(queue);
3321         RecordProfileData(device,blurColumnKernel,event);
3322       }
3323     }
3324   }
3325 
3326   /* get result */
3327   if (ALIGNED(filteredPixels,CLPixelPacket))
3328   {
3329     length = image->columns * image->rows;
3330     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3331   }
3332   else
3333   {
3334     length = image->columns * image->rows;
3335     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3336   }
3337   if (clStatus != CL_SUCCESS)
3338   {
3339     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3340     goto cleanup;
3341   }
3342 
3343   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3344 
3345 cleanup:
3346 
3347   image_view=DestroyCacheView(image_view);
3348   if (filteredImage_view != NULL)
3349     filteredImage_view=DestroyCacheView(filteredImage_view);
3350 
3351   if (imageBuffer!=NULL)
3352     clEnv->library->clReleaseMemObject(imageBuffer);
3353   if (filteredImageBuffer!=NULL)
3354     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3355   if (tempImageBuffer!=NULL)
3356     clEnv->library->clReleaseMemObject(tempImageBuffer);
3357   if (imageKernelBuffer!=NULL)
3358     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3359   if (blurRowKernel!=NULL)
3360     ReleaseOpenCLKernel(blurRowKernel);
3361   if (blurColumnKernel!=NULL)
3362     ReleaseOpenCLKernel(blurColumnKernel);
3363   if (queue != NULL)
3364     ReleaseOpenCLCommandQueue(device, queue);
3365   if (device != NULL)
3366     ReleaseOpenCLDevice(device);
3367   if (outputReady == MagickFalse)
3368   {
3369     if (filteredImage != NULL)
3370     {
3371       DestroyImage(filteredImage);
3372       filteredImage = NULL;
3373     }
3374   }
3375 
3376   return(filteredImage);
3377 }
3378 
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3379 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3380   const double radius,const double strength,ExceptionInfo *exception)
3381 {
3382   Image
3383     *filteredImage;
3384 
3385   MagickCLEnv
3386     clEnv;
3387 
3388   assert(image != NULL);
3389   assert(exception != (ExceptionInfo *) NULL);
3390 
3391   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3392     return((Image *) NULL);
3393 
3394   clEnv=getOpenCLEnvironment(exception);
3395   if (clEnv == (MagickCLEnv) NULL)
3396     return((Image *) NULL);
3397 
3398   filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3399     exception);
3400   return(filteredImage);
3401 }
3402 
3403 /*
3404 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3405 %                                                                             %
3406 %                                                                             %
3407 %                                                                             %
3408 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
3409 %                                                                             %
3410 %                                                                             %
3411 %                                                                             %
3412 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3413 */
3414 
ComputeModulateImage(Image * image,MagickCLEnv clEnv,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3415 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3416   const double percent_brightness,const double percent_hue,
3417   const double percent_saturation,const ColorspaceType colorspace,
3418   ExceptionInfo *exception)
3419 {
3420   CacheView
3421     *image_view;
3422 
3423   cl_float
3424     bright,
3425     hue,
3426     saturation;
3427 
3428   cl_command_queue
3429     queue;
3430 
3431   cl_int
3432     color,
3433     clStatus;
3434 
3435   cl_kernel
3436     modulateKernel;
3437 
3438   cl_event
3439     event;
3440 
3441   cl_mem
3442     imageBuffer;
3443 
3444   cl_mem_flags
3445     mem_flags;
3446 
3447   MagickBooleanType
3448     outputReady;
3449 
3450   MagickCLDevice
3451     device;
3452 
3453   MagickSizeType
3454     length;
3455 
3456   register ssize_t
3457     i;
3458 
3459   void
3460     *inputPixels;
3461 
3462   inputPixels = NULL;
3463   imageBuffer = NULL;
3464   modulateKernel = NULL;
3465 
3466   assert(image != (Image *) NULL);
3467   assert(image->signature == MagickCoreSignature);
3468   if (image->debug != MagickFalse)
3469     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3470 
3471   /*
3472    * initialize opencl env
3473    */
3474   device = RequestOpenCLDevice(clEnv);
3475   queue = AcquireOpenCLCommandQueue(device);
3476 
3477   outputReady = MagickFalse;
3478 
3479   /* Create and initialize OpenCL buffers.
3480    inputPixels = AcquirePixelCachePixels(image, &length, exception);
3481    assume this  will get a writable image
3482    */
3483   image_view=AcquireAuthenticCacheView(image,exception);
3484   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3485   if (inputPixels == (void *) NULL)
3486   {
3487     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3488     goto cleanup;
3489   }
3490 
3491   /* If the host pointer is aligned to the size of CLPixelPacket,
3492    then use the host buffer directly from the GPU; otherwise,
3493    create a buffer on the GPU and copy the data over
3494    */
3495   if (ALIGNED(inputPixels,CLPixelPacket))
3496   {
3497     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3498   }
3499   else
3500   {
3501     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3502   }
3503   /* create a CL buffer from image pixel buffer */
3504   length = image->columns * image->rows;
3505   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3506   if (clStatus != CL_SUCCESS)
3507   {
3508     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3509     goto cleanup;
3510   }
3511 
3512   modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3513   if (modulateKernel == NULL)
3514   {
3515     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3516     goto cleanup;
3517   }
3518 
3519   bright=percent_brightness;
3520   hue=percent_hue;
3521   saturation=percent_saturation;
3522   color=colorspace;
3523 
3524   i = 0;
3525   clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3526   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3527   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3528   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3529   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3530   if (clStatus != CL_SUCCESS)
3531   {
3532     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3533     goto cleanup;
3534   }
3535 
3536   {
3537     size_t global_work_size[2];
3538     global_work_size[0] = image->columns;
3539     global_work_size[1] = image->rows;
3540     /* launch the kernel */
3541 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3542     if (clStatus != CL_SUCCESS)
3543     {
3544       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3545       goto cleanup;
3546     }
3547     RecordProfileData(device,modulateKernel,event);
3548   }
3549 
3550   if (ALIGNED(inputPixels,CLPixelPacket))
3551   {
3552     length = image->columns * image->rows;
3553     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3554   }
3555   else
3556   {
3557     length = image->columns * image->rows;
3558     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3559   }
3560   if (clStatus != CL_SUCCESS)
3561   {
3562     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3563     goto cleanup;
3564   }
3565 
3566   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3567 
3568 cleanup:
3569 
3570   image_view=DestroyCacheView(image_view);
3571 
3572   if (imageBuffer!=NULL)
3573     clEnv->library->clReleaseMemObject(imageBuffer);
3574   if (modulateKernel!=NULL)
3575     ReleaseOpenCLKernel(modulateKernel);
3576   if (queue != NULL)
3577     ReleaseOpenCLCommandQueue(device,queue);
3578   if (device != NULL)
3579     ReleaseOpenCLDevice(device);
3580 
3581   return outputReady;
3582 
3583 }
3584 
AccelerateModulateImage(Image * image,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3585 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3586   const double percent_brightness,const double percent_hue,
3587   const double percent_saturation,const ColorspaceType colorspace,
3588   ExceptionInfo *exception)
3589 {
3590   MagickBooleanType
3591     status;
3592 
3593   MagickCLEnv
3594     clEnv;
3595 
3596   assert(image != NULL);
3597   assert(exception != (ExceptionInfo *) NULL);
3598 
3599   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3600     return(MagickFalse);
3601 
3602   if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3603     return(MagickFalse);
3604 
3605   clEnv=getOpenCLEnvironment(exception);
3606   if (clEnv == (MagickCLEnv) NULL)
3607     return(MagickFalse);
3608 
3609   status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3610     percent_saturation,colorspace,exception);
3611   return(status);
3612 }
3613 
3614 /*
3615 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3616 %                                                                             %
3617 %                                                                             %
3618 %                                                                             %
3619 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3620 %                                                                             %
3621 %                                                                             %
3622 %                                                                             %
3623 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3624 */
3625 
ComputeMotionBlurImage(const Image * image,MagickCLEnv clEnv,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3626 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3627   const double *kernel,const size_t width,const OffsetInfo *offset,
3628   ExceptionInfo *exception)
3629 {
3630   CacheView
3631     *filteredImage_view,
3632     *image_view;
3633 
3634   cl_command_queue
3635     queue;
3636 
3637   cl_float4
3638     biasPixel;
3639 
3640   cl_int
3641     clStatus;
3642 
3643   cl_kernel
3644     motionBlurKernel;
3645 
3646   cl_event
3647     event;
3648 
3649   cl_mem
3650     filteredImageBuffer,
3651     imageBuffer,
3652     imageKernelBuffer,
3653     offsetBuffer;
3654 
3655   cl_mem_flags
3656     mem_flags;
3657 
3658   const void
3659     *inputPixels;
3660 
3661   float
3662     *kernelBufferPtr;
3663 
3664   Image
3665     *filteredImage;
3666 
3667   int
3668     *offsetBufferPtr;
3669 
3670   MagickBooleanType
3671     outputReady;
3672 
3673   MagickCLDevice
3674     device;
3675 
3676   PixelInfo
3677     bias;
3678 
3679   MagickSizeType
3680     length;
3681 
3682   size_t
3683     global_work_size[2],
3684     local_work_size[2];
3685 
3686   unsigned int
3687     i,
3688     imageHeight,
3689     imageWidth,
3690     matte;
3691 
3692   void
3693     *filteredPixels,
3694     *hostPtr;
3695 
3696   outputReady = MagickFalse;
3697   filteredImage = NULL;
3698   filteredImage_view = NULL;
3699   imageBuffer = NULL;
3700   filteredImageBuffer = NULL;
3701   imageKernelBuffer = NULL;
3702   motionBlurKernel = NULL;
3703   queue = NULL;
3704 
3705   device = RequestOpenCLDevice(clEnv);
3706 
3707   /* Create and initialize OpenCL buffers. */
3708 
3709   image_view=AcquireAuthenticCacheView(image,exception);
3710   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3711   if (inputPixels == (const void *) NULL)
3712   {
3713     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3714       "UnableToReadPixelCache.","`%s'",image->filename);
3715     goto cleanup;
3716   }
3717 
3718   // If the host pointer is aligned to the size of CLPixelPacket,
3719   // then use the host buffer directly from the GPU; otherwise,
3720   // create a buffer on the GPU and copy the data over
3721   if (ALIGNED(inputPixels,CLPixelPacket))
3722   {
3723     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3724   }
3725   else
3726   {
3727     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3728   }
3729   // create a CL buffer from image pixel buffer
3730   length = image->columns * image->rows;
3731   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3732     length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3733   if (clStatus != CL_SUCCESS)
3734   {
3735     (void) ThrowMagickException(exception, GetMagickModule(),
3736       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3737     goto cleanup;
3738   }
3739 
3740 
3741   filteredImage = CloneImage(image,image->columns,image->rows,
3742     MagickTrue,exception);
3743   assert(filteredImage != NULL);
3744   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3745   {
3746     (void) ThrowMagickException(exception, GetMagickModule(),
3747       ResourceLimitError, "CloneImage failed.", ".");
3748     goto cleanup;
3749   }
3750   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3751   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3752   if (filteredPixels == (void *) NULL)
3753   {
3754     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3755       "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3756     goto cleanup;
3757   }
3758 
3759   if (ALIGNED(filteredPixels,CLPixelPacket))
3760   {
3761     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3762     hostPtr = filteredPixels;
3763   }
3764   else
3765   {
3766     mem_flags = CL_MEM_WRITE_ONLY;
3767     hostPtr = NULL;
3768   }
3769   // create a CL buffer from image pixel buffer
3770   length = image->columns * image->rows;
3771   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3772     length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3773   if (clStatus != CL_SUCCESS)
3774   {
3775     (void) ThrowMagickException(exception, GetMagickModule(),
3776       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3777     goto cleanup;
3778   }
3779 
3780 
3781   imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3782     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3783     &clStatus);
3784   if (clStatus != CL_SUCCESS)
3785   {
3786     (void) ThrowMagickException(exception, GetMagickModule(),
3787       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3788     goto cleanup;
3789   }
3790 
3791   queue = AcquireOpenCLCommandQueue(device);
3792   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3793     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3794   if (clStatus != CL_SUCCESS)
3795   {
3796     (void) ThrowMagickException(exception, GetMagickModule(),
3797       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3798     goto cleanup;
3799   }
3800   for (i = 0; i < width; i++)
3801   {
3802     kernelBufferPtr[i] = (float) kernel[i];
3803   }
3804   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3805     0, NULL, NULL);
3806  if (clStatus != CL_SUCCESS)
3807   {
3808     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3809       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3810     goto cleanup;
3811   }
3812 
3813   offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3814     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3815     &clStatus);
3816   if (clStatus != CL_SUCCESS)
3817   {
3818     (void) ThrowMagickException(exception, GetMagickModule(),
3819       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3820     goto cleanup;
3821   }
3822 
3823   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3824     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3825   if (clStatus != CL_SUCCESS)
3826   {
3827     (void) ThrowMagickException(exception, GetMagickModule(),
3828       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3829     goto cleanup;
3830   }
3831   for (i = 0; i < width; i++)
3832   {
3833     offsetBufferPtr[2*i] = (int)offset[i].x;
3834     offsetBufferPtr[2*i+1] = (int)offset[i].y;
3835   }
3836   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3837     NULL, NULL);
3838  if (clStatus != CL_SUCCESS)
3839   {
3840     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3841       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3842     goto cleanup;
3843   }
3844 
3845 
3846  // get the OpenCL kernel
3847   motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3848   if (motionBlurKernel == NULL)
3849   {
3850     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3851       "AcquireOpenCLKernel failed.", ".");
3852     goto cleanup;
3853   }
3854 
3855   // set the kernel arguments
3856   i = 0;
3857   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3858     (void *)&imageBuffer);
3859   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3860     (void *)&filteredImageBuffer);
3861   imageWidth = (unsigned int) image->columns;
3862   imageHeight = (unsigned int) image->rows;
3863   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3864     &imageWidth);
3865   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3866     &imageHeight);
3867   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3868     (void *)&imageKernelBuffer);
3869   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3870     &width);
3871   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3872     (void *)&offsetBuffer);
3873 
3874   GetPixelInfo(image,&bias);
3875   biasPixel.s[0] = bias.red;
3876   biasPixel.s[1] = bias.green;
3877   biasPixel.s[2] = bias.blue;
3878   biasPixel.s[3] = bias.alpha;
3879   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3880 
3881   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3882   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3883   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3884   if (clStatus != CL_SUCCESS)
3885   {
3886     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3887       "clEnv->library->clSetKernelArg failed.", ".");
3888     goto cleanup;
3889   }
3890 
3891   // launch the kernel
3892   local_work_size[0] = 16;
3893   local_work_size[1] = 16;
3894   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3895                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3896   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3897                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3898   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3899 	  global_work_size, local_work_size, 0, NULL, &event);
3900 
3901   if (clStatus != CL_SUCCESS)
3902   {
3903     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3904       "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3905     goto cleanup;
3906   }
3907   RecordProfileData(device,motionBlurKernel,event);
3908 
3909   if (ALIGNED(filteredPixels,CLPixelPacket))
3910   {
3911     length = image->columns * image->rows;
3912     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3913       CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3914       NULL, &clStatus);
3915   }
3916   else
3917   {
3918     length = image->columns * image->rows;
3919     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3920       length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3921   }
3922   if (clStatus != CL_SUCCESS)
3923   {
3924     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3925       "Reading output image from CL buffer failed.", ".");
3926     goto cleanup;
3927   }
3928   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3929 
3930 cleanup:
3931 
3932   image_view=DestroyCacheView(image_view);
3933   if (filteredImage_view != NULL)
3934     filteredImage_view=DestroyCacheView(filteredImage_view);
3935 
3936   if (filteredImageBuffer!=NULL)
3937     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3938   if (imageBuffer!=NULL)
3939     clEnv->library->clReleaseMemObject(imageBuffer);
3940   if (imageKernelBuffer!=NULL)
3941     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3942   if (motionBlurKernel!=NULL)
3943     ReleaseOpenCLKernel(motionBlurKernel);
3944   if (queue != NULL)
3945     ReleaseOpenCLCommandQueue(device,queue);
3946   if (device != NULL)
3947     ReleaseOpenCLDevice(device);
3948   if (outputReady == MagickFalse && filteredImage != NULL)
3949     filteredImage=DestroyImage(filteredImage);
3950 
3951   return(filteredImage);
3952 }
3953 
AccelerateMotionBlurImage(const Image * image,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3954 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3955   const double* kernel,const size_t width,const OffsetInfo *offset,
3956   ExceptionInfo *exception)
3957 {
3958   Image
3959     *filteredImage;
3960 
3961   MagickCLEnv
3962     clEnv;
3963 
3964   assert(image != NULL);
3965   assert(kernel != (double *) NULL);
3966   assert(offset != (OffsetInfo *) NULL);
3967   assert(exception != (ExceptionInfo *) NULL);
3968 
3969   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3970     return((Image *) NULL);
3971 
3972   clEnv=getOpenCLEnvironment(exception);
3973   if (clEnv == (MagickCLEnv) NULL)
3974     return((Image *) NULL);
3975 
3976   filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3977     exception);
3978   return(filteredImage);
3979 }
3980 
3981 /*
3982 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3983 %                                                                             %
3984 %                                                                             %
3985 %                                                                             %
3986 %     A c c e l e r a t e R e s i z e I m a g e                               %
3987 %                                                                             %
3988 %                                                                             %
3989 %                                                                             %
3990 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3991 */
3992 
resizeHorizontalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,ExceptionInfo * exception)3993 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3994   cl_command_queue queue,const Image *image,Image *filteredImage,
3995   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3996   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3997   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3998   const float xFactor,ExceptionInfo *exception)
3999 {
4000   cl_kernel
4001     horizontalKernel;
4002 
4003   cl_int
4004     status;
4005 
4006   const unsigned int
4007     workgroupSize = 256;
4008 
4009   float
4010     resizeFilterScale,
4011     resizeFilterSupport,
4012     resizeFilterWindowSupport,
4013     resizeFilterBlur,
4014     scale,
4015     support;
4016 
4017   int
4018     cacheRangeStart,
4019     cacheRangeEnd,
4020     numCachedPixels,
4021     resizeFilterType,
4022     resizeWindowType;
4023 
4024   MagickBooleanType
4025     outputReady;
4026 
4027   size_t
4028     gammaAccumulatorLocalMemorySize,
4029     gsize[2],
4030     i,
4031     imageCacheLocalMemorySize,
4032     pixelAccumulatorLocalMemorySize,
4033     lsize[2],
4034     totalLocalMemorySize,
4035     weightAccumulatorLocalMemorySize;
4036 
4037   unsigned int
4038     chunkSize,
4039     pixelPerWorkgroup;
4040 
4041   horizontalKernel=NULL;
4042   outputReady=MagickFalse;
4043 
4044   /*
4045   Apply filter to resize vertically from image to resize image.
4046   */
4047   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
4048   support=scale*GetResizeFilterSupport(resizeFilter);
4049   if (support < 0.5)
4050   {
4051     /*
4052     Support too small even for nearest neighbour: Reduce to point
4053     sampling.
4054     */
4055     support=(MagickRealType) 0.5;
4056     scale=1.0;
4057   }
4058   scale=PerceptibleReciprocal(scale);
4059 
4060   if (resizedColumns < workgroupSize)
4061   {
4062     chunkSize=32;
4063     pixelPerWorkgroup=32;
4064   }
4065   else
4066   {
4067     chunkSize=workgroupSize;
4068     pixelPerWorkgroup=workgroupSize;
4069   }
4070 
4071 DisableMSCWarning(4127)
4072   while(1)
4073 RestoreMSCWarning
4074   {
4075     /* calculate the local memory size needed per workgroup */
4076     cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
4077     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
4078       MagickEpsilon)+support+0.5);
4079     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4080     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4081       number_channels;
4082     totalLocalMemorySize=imageCacheLocalMemorySize;
4083 
4084     /* local size for the pixel accumulator */
4085     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4086     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4087 
4088     /* local memory size for the weight accumulator */
4089     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4090     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4091 
4092     /* local memory size for the gamma accumulator */
4093     if ((number_channels == 4) || (number_channels == 2))
4094       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4095     else
4096       gammaAccumulatorLocalMemorySize=sizeof(float);
4097     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4098 
4099     if (totalLocalMemorySize <= device->local_memory_size)
4100       break;
4101     else
4102     {
4103       pixelPerWorkgroup=pixelPerWorkgroup/2;
4104       chunkSize=chunkSize/2;
4105       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4106       {
4107         /* quit, fallback to CPU */
4108         goto cleanup;
4109       }
4110     }
4111   }
4112 
4113   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4114   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4115 
4116   horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4117   if (horizontalKernel == (cl_kernel) NULL)
4118   {
4119     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4120       ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4121     goto cleanup;
4122   }
4123 
4124   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4125   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4126   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4127   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4128 
4129   i=0;
4130   status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4131   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4132   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4133   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4134   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4135   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4136   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4137   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4138   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4139   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4140   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4141   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4142   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4143   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4144   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4145   status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4146   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4147   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4148   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4149   status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4150   status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4151   status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4152 
4153   if (status != CL_SUCCESS)
4154   {
4155     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4156       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4157     goto cleanup;
4158   }
4159 
4160   gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4161     workgroupSize;
4162   gsize[1]=resizedRows;
4163   lsize[0]=workgroupSize;
4164   lsize[1]=1;
4165   outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4166     (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4167 cleanup:
4168 
4169   if (horizontalKernel != (cl_kernel) NULL)
4170     ReleaseOpenCLKernel(horizontalKernel);
4171 
4172   return(outputReady);
4173 }
4174 
resizeVerticalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,ExceptionInfo * exception)4175 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4176   cl_command_queue queue,const Image *image,Image * filteredImage,
4177   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4178   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4179   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4180   const float yFactor,ExceptionInfo *exception)
4181 {
4182   cl_kernel
4183     verticalKernel;
4184 
4185   cl_int
4186     status;
4187 
4188   const unsigned int
4189     workgroupSize = 256;
4190 
4191   float
4192     resizeFilterScale,
4193     resizeFilterSupport,
4194     resizeFilterWindowSupport,
4195     resizeFilterBlur,
4196     scale,
4197     support;
4198 
4199   int
4200     cacheRangeStart,
4201     cacheRangeEnd,
4202     numCachedPixels,
4203     resizeFilterType,
4204     resizeWindowType;
4205 
4206   MagickBooleanType
4207     outputReady;
4208 
4209   size_t
4210     gammaAccumulatorLocalMemorySize,
4211     gsize[2],
4212     i,
4213     imageCacheLocalMemorySize,
4214     pixelAccumulatorLocalMemorySize,
4215     lsize[2],
4216     totalLocalMemorySize,
4217     weightAccumulatorLocalMemorySize;
4218 
4219   unsigned int
4220     chunkSize,
4221     pixelPerWorkgroup;
4222 
4223   verticalKernel=NULL;
4224   outputReady=MagickFalse;
4225 
4226   /*
4227   Apply filter to resize vertically from image to resize image.
4228   */
4229   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4230   support=scale*GetResizeFilterSupport(resizeFilter);
4231   if (support < 0.5)
4232   {
4233     /*
4234     Support too small even for nearest neighbour: Reduce to point
4235     sampling.
4236     */
4237     support=(MagickRealType) 0.5;
4238     scale=1.0;
4239   }
4240   scale=PerceptibleReciprocal(scale);
4241 
4242   if (resizedRows < workgroupSize)
4243   {
4244     chunkSize=32;
4245     pixelPerWorkgroup=32;
4246   }
4247   else
4248   {
4249     chunkSize=workgroupSize;
4250     pixelPerWorkgroup=workgroupSize;
4251   }
4252 
4253 DisableMSCWarning(4127)
4254   while(1)
4255 RestoreMSCWarning
4256   {
4257     /* calculate the local memory size needed per workgroup */
4258     cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4259     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4260       MagickEpsilon)+support+0.5);
4261     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4262     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4263       number_channels;
4264     totalLocalMemorySize=imageCacheLocalMemorySize;
4265 
4266     /* local size for the pixel accumulator */
4267     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4268     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4269 
4270     /* local memory size for the weight accumulator */
4271     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4272     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4273 
4274     /* local memory size for the gamma accumulator */
4275     if ((number_channels == 4) || (number_channels == 2))
4276       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4277     else
4278       gammaAccumulatorLocalMemorySize=sizeof(float);
4279     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4280 
4281     if (totalLocalMemorySize <= device->local_memory_size)
4282       break;
4283     else
4284     {
4285       pixelPerWorkgroup=pixelPerWorkgroup/2;
4286       chunkSize=chunkSize/2;
4287       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4288       {
4289         /* quit, fallback to CPU */
4290         goto cleanup;
4291       }
4292     }
4293   }
4294 
4295   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4296   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4297 
4298   verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4299   if (verticalKernel == (cl_kernel) NULL)
4300   {
4301     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4302       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4303     goto cleanup;
4304   }
4305 
4306   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4307   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4308   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4309   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4310 
4311   i=0;
4312   status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4313   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4314   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4315   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4316   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4317   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4318   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4319   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4320   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4321   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4322   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4323   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4324   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4325   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4326   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4327   status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4328   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4329   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4330   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4331   status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4332   status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4333   status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4334 
4335   if (status != CL_SUCCESS)
4336   {
4337     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4338       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4339     goto cleanup;
4340   }
4341 
4342   gsize[0]=resizedColumns;
4343   gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4344     workgroupSize;
4345   lsize[0]=1;
4346   lsize[1]=workgroupSize;
4347   outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4348     gsize,lsize,image,filteredImage,exception);
4349 
4350 cleanup:
4351 
4352   if (verticalKernel != (cl_kernel) NULL)
4353     ReleaseOpenCLKernel(verticalKernel);
4354 
4355   return(outputReady);
4356 }
4357 
ComputeResizeImage(const Image * image,MagickCLEnv clEnv,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4358 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4359   const size_t resizedColumns,const size_t resizedRows,
4360   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4361 {
4362   cl_command_queue
4363     queue;
4364 
4365   cl_mem
4366     cubicCoefficientsBuffer,
4367     filteredImageBuffer,
4368     imageBuffer,
4369     tempImageBuffer;
4370 
4371   cl_uint
4372     number_channels;
4373 
4374   const double
4375     *resizeFilterCoefficient;
4376 
4377   float
4378     coefficientBuffer[7],
4379     xFactor,
4380     yFactor;
4381 
4382   MagickBooleanType
4383     outputReady;
4384 
4385   MagickCLDevice
4386     device;
4387 
4388   MagickSizeType
4389     length;
4390 
4391   Image
4392     *filteredImage;
4393 
4394   size_t
4395     i;
4396 
4397   filteredImage=NULL;
4398   tempImageBuffer=NULL;
4399   cubicCoefficientsBuffer=NULL;
4400   outputReady=MagickFalse;
4401 
4402   device=RequestOpenCLDevice(clEnv);
4403   queue=AcquireOpenCLCommandQueue(device);
4404   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4405     exception);
4406   if (filteredImage == (Image *) NULL)
4407     goto cleanup;
4408   if (filteredImage->number_channels != image->number_channels)
4409     goto cleanup;
4410   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4411   if (imageBuffer == (cl_mem) NULL)
4412     goto cleanup;
4413   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4414   if (filteredImageBuffer == (cl_mem) NULL)
4415     goto cleanup;
4416 
4417   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4418   for (i = 0; i < 7; i++)
4419     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4420   cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4421     CL_MEM_READ_ONLY,7*sizeof(*resizeFilterCoefficient),&coefficientBuffer);
4422   if (cubicCoefficientsBuffer == (cl_mem) NULL)
4423   {
4424     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4425       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4426     goto cleanup;
4427   }
4428 
4429   number_channels=(cl_uint) image->number_channels;
4430   xFactor=(float) resizedColumns/(float) image->columns;
4431   yFactor=(float) resizedRows/(float) image->rows;
4432   if (xFactor > yFactor)
4433   {
4434     length=resizedColumns*image->rows*number_channels;
4435     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4436       sizeof(CLQuantum),(void *) NULL);
4437     if (tempImageBuffer == (cl_mem) NULL)
4438     {
4439       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4440         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4441       goto cleanup;
4442     }
4443 
4444     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4445       imageBuffer,number_channels,(cl_uint) image->columns,
4446       (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4447       (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4448       exception);
4449     if (outputReady == MagickFalse)
4450       goto cleanup;
4451 
4452     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4453       tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4454       (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4455       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4456       exception);
4457     if (outputReady == MagickFalse)
4458       goto cleanup;
4459   }
4460   else
4461   {
4462     length=image->columns*resizedRows*number_channels;
4463     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4464       sizeof(CLQuantum),(void *) NULL);
4465     if (tempImageBuffer == (cl_mem) NULL)
4466     {
4467       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4468         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4469       goto cleanup;
4470     }
4471 
4472     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4473       imageBuffer,number_channels,(cl_uint) image->columns,
4474       (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4475       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4476       exception);
4477     if (outputReady == MagickFalse)
4478       goto cleanup;
4479 
4480     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4481       tempImageBuffer,number_channels,(cl_uint) image->columns,
4482       (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4483       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4484       exception);
4485     if (outputReady == MagickFalse)
4486       goto cleanup;
4487   }
4488 
4489 cleanup:
4490 
4491   if (tempImageBuffer != (cl_mem) NULL)
4492     ReleaseOpenCLMemObject(tempImageBuffer);
4493   if (cubicCoefficientsBuffer != (cl_mem) NULL)
4494     ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4495   if (queue != (cl_command_queue) NULL)
4496     ReleaseOpenCLCommandQueue(device,queue);
4497   if (device != (MagickCLDevice) NULL)
4498     ReleaseOpenCLDevice(device);
4499   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4500     filteredImage=DestroyImage(filteredImage);
4501 
4502   return(filteredImage);
4503 }
4504 
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4505 static MagickBooleanType gpuSupportedResizeWeighting(
4506   ResizeWeightingFunctionType f)
4507 {
4508   unsigned int
4509     i;
4510 
4511   for (i = 0; ;i++)
4512   {
4513     if (supportedResizeWeighting[i] == LastWeightingFunction)
4514       break;
4515     if (supportedResizeWeighting[i] == f)
4516       return(MagickTrue);
4517   }
4518   return(MagickFalse);
4519 }
4520 
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4521 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4522   const size_t resizedColumns,const size_t resizedRows,
4523   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4524 {
4525   Image
4526     *filteredImage;
4527 
4528   MagickCLEnv
4529     clEnv;
4530 
4531   assert(image != NULL);
4532   assert(exception != (ExceptionInfo *) NULL);
4533 
4534   if (checkAccelerateCondition(image) == MagickFalse)
4535     return((Image *) NULL);
4536 
4537   if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4538          resizeFilter)) == MagickFalse) ||
4539       (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4540          resizeFilter)) == MagickFalse))
4541     return((Image *) NULL);
4542 
4543   clEnv=getOpenCLEnvironment(exception);
4544   if (clEnv == (MagickCLEnv) NULL)
4545     return((Image *) NULL);
4546 
4547   filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4548     resizeFilter,exception);
4549   return(filteredImage);
4550 }
4551 
4552 /*
4553 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4554 %                                                                             %
4555 %                                                                             %
4556 %                                                                             %
4557 %     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
4558 %                                                                             %
4559 %                                                                             %
4560 %                                                                             %
4561 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4562 */
4563 
ComputeRotationalBlurImage(const Image * image,MagickCLEnv clEnv,const double angle,ExceptionInfo * exception)4564 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4565   const double angle,ExceptionInfo *exception)
4566 {
4567   cl_command_queue
4568     queue;
4569 
4570   cl_float2
4571     blurCenter;
4572 
4573   cl_int
4574     status;
4575 
4576   cl_mem
4577     cosThetaBuffer,
4578     filteredImageBuffer,
4579     imageBuffer,
4580     sinThetaBuffer;
4581 
4582   cl_kernel
4583     rotationalBlurKernel;
4584 
4585   cl_uint
4586     cossin_theta_size,
4587     number_channels;
4588 
4589   float
4590     blurRadius,
4591     *cosThetaPtr,
4592     offset,
4593     *sinThetaPtr,
4594     theta;
4595 
4596   Image
4597     *filteredImage;
4598 
4599   MagickBooleanType
4600     outputReady;
4601 
4602   MagickCLDevice
4603     device;
4604 
4605   size_t
4606     gsize[2],
4607     i;
4608 
4609   filteredImage=NULL;
4610   sinThetaBuffer=NULL;
4611   cosThetaBuffer=NULL;
4612   rotationalBlurKernel=NULL;
4613   outputReady=MagickFalse;
4614 
4615   device=RequestOpenCLDevice(clEnv);
4616   queue=AcquireOpenCLCommandQueue(device);
4617   filteredImage=cloneImage(image,exception);
4618   if (filteredImage == (Image *) NULL)
4619     goto cleanup;
4620   if (filteredImage->number_channels != image->number_channels)
4621     goto cleanup;
4622   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4623   if (imageBuffer == (cl_mem) NULL)
4624     goto cleanup;
4625   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4626   if (filteredImageBuffer == (cl_mem) NULL)
4627     goto cleanup;
4628 
4629   blurCenter.x=(float) (image->columns-1)/2.0;
4630   blurCenter.y=(float) (image->rows-1)/2.0;
4631   blurRadius=hypot(blurCenter.x,blurCenter.y);
4632   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4633     (double) blurRadius)+2UL);
4634 
4635   cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4636   if (cosThetaPtr == (float *) NULL)
4637     goto cleanup;
4638   sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4639   if (sinThetaPtr == (float *) NULL)
4640   {
4641     cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4642     goto cleanup;
4643   }
4644 
4645   theta=DegreesToRadians(angle)/(MagickRealType) (cossin_theta_size-1);
4646   offset=theta*(MagickRealType) (cossin_theta_size-1)/2.0;
4647   for (i=0; i < (ssize_t) cossin_theta_size; i++)
4648   {
4649     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4650     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4651   }
4652 
4653   sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4654     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4655   sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4656   cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4657     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4658   cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4659   if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4660   {
4661     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4662       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4663     goto cleanup;
4664   }
4665 
4666   rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4667   if (rotationalBlurKernel == (cl_kernel) NULL)
4668   {
4669     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4670       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4671     goto cleanup;
4672   }
4673 
4674   number_channels=(cl_uint) image->number_channels;
4675 
4676   i=0;
4677   status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4678   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4679   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4680   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4681   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4682   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4683   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4684   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4685   if (status != CL_SUCCESS)
4686   {
4687     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4688       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4689     goto cleanup;
4690   }
4691 
4692   gsize[0]=image->columns;
4693   gsize[1]=image->rows;
4694   outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4695     (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4696     exception);
4697 
4698 cleanup:
4699 
4700   if (sinThetaBuffer != (cl_mem) NULL)
4701     ReleaseOpenCLMemObject(sinThetaBuffer);
4702   if (cosThetaBuffer != (cl_mem) NULL)
4703     ReleaseOpenCLMemObject(cosThetaBuffer);
4704   if (rotationalBlurKernel != (cl_kernel) NULL)
4705     ReleaseOpenCLKernel(rotationalBlurKernel);
4706   if (queue != (cl_command_queue) NULL)
4707     ReleaseOpenCLCommandQueue(device,queue);
4708   if (device != (MagickCLDevice) NULL)
4709     ReleaseOpenCLDevice(device);
4710   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4711     filteredImage=DestroyImage(filteredImage);
4712 
4713   return(filteredImage);
4714 }
4715 
AccelerateRotationalBlurImage(const Image * image,const double angle,ExceptionInfo * exception)4716 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4717   const double angle,ExceptionInfo *exception)
4718 {
4719   Image
4720     *filteredImage;
4721 
4722   MagickCLEnv
4723     clEnv;
4724 
4725   assert(image != NULL);
4726   assert(exception != (ExceptionInfo *) NULL);
4727 
4728   if (checkAccelerateCondition(image) == MagickFalse)
4729     return((Image *) NULL);
4730 
4731   clEnv=getOpenCLEnvironment(exception);
4732   if (clEnv == (MagickCLEnv) NULL)
4733     return((Image *) NULL);
4734 
4735   filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4736   return filteredImage;
4737 }
4738 
4739 /*
4740 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4741 %                                                                             %
4742 %                                                                             %
4743 %                                                                             %
4744 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4745 %                                                                             %
4746 %                                                                             %
4747 %                                                                             %
4748 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4749 */
4750 
ComputeUnsharpMaskImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4751 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4752   const double radius,const double sigma,const double gain,
4753   const double threshold,ExceptionInfo *exception)
4754 {
4755   cl_command_queue
4756     queue;
4757 
4758   cl_int
4759     status;
4760 
4761   cl_kernel
4762     blurRowKernel,
4763     unsharpMaskBlurColumnKernel;
4764 
4765   cl_mem
4766     filteredImageBuffer,
4767     imageBuffer,
4768     imageKernelBuffer,
4769     tempImageBuffer;
4770 
4771   cl_uint
4772     imageColumns,
4773     imageRows,
4774     kernelWidth,
4775     number_channels;
4776 
4777   float
4778     fGain,
4779     fThreshold;
4780 
4781   Image
4782     *filteredImage;
4783 
4784   int
4785     chunkSize;
4786 
4787   MagickBooleanType
4788     outputReady;
4789 
4790   MagickCLDevice
4791     device;
4792 
4793   MagickSizeType
4794     length;
4795 
4796   size_t
4797     gsize[2],
4798     i,
4799     lsize[2];
4800 
4801   filteredImage=NULL;
4802   tempImageBuffer=NULL;
4803   imageKernelBuffer=NULL;
4804   blurRowKernel=NULL;
4805   unsharpMaskBlurColumnKernel=NULL;
4806   outputReady=MagickFalse;
4807 
4808   device=RequestOpenCLDevice(clEnv);
4809   queue=AcquireOpenCLCommandQueue(device);
4810   filteredImage=cloneImage(image,exception);
4811   if (filteredImage == (Image *) NULL)
4812     goto cleanup;
4813   if (filteredImage->number_channels != image->number_channels)
4814     goto cleanup;
4815   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4816   if (imageBuffer == (cl_mem) NULL)
4817     goto cleanup;
4818   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4819   if (filteredImageBuffer == (cl_mem) NULL)
4820     goto cleanup;
4821 
4822   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4823     exception);
4824 
4825   length=image->columns*image->rows;
4826   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4827     sizeof(cl_float4),NULL);
4828   if (tempImageBuffer == (cl_mem) NULL)
4829   {
4830     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4831       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4832     goto cleanup;
4833   }
4834 
4835   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4836   if (blurRowKernel == (cl_kernel) NULL)
4837   {
4838     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4839       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4840     goto cleanup;
4841   }
4842 
4843   unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4844     "UnsharpMaskBlurColumn");
4845   if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4846   {
4847     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4848       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4849     goto cleanup;
4850   }
4851 
4852   number_channels=(cl_uint) image->number_channels;
4853   imageColumns=(cl_uint) image->columns;
4854   imageRows=(cl_uint) image->rows;
4855 
4856   chunkSize = 256;
4857 
4858   i=0;
4859   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4860   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4861   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4862   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4863   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4864   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4865   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4866   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4867   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4868   if (status != CL_SUCCESS)
4869   {
4870     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4871       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4872     goto cleanup;
4873   }
4874 
4875   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4876   gsize[1]=image->rows;
4877   lsize[0]=chunkSize;
4878   lsize[1]=1;
4879   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4880     (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4881 
4882   chunkSize=256;
4883   fGain=(float) gain;
4884   fThreshold=(float) threshold;
4885 
4886   i=0;
4887   status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4888   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4889   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4890   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4891   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4892   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4893   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4894   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4895   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4896   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4897   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4898   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4899   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4900   if (status != CL_SUCCESS)
4901   {
4902     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4903       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4904     goto cleanup;
4905   }
4906 
4907   gsize[0]=image->columns;
4908   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4909   lsize[0]=1;
4910   lsize[1]=chunkSize;
4911   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4912     (const size_t *) NULL,gsize,lsize,image,filteredImage,exception);
4913 
4914 cleanup:
4915 
4916   if (tempImageBuffer != (cl_mem) NULL)
4917     ReleaseOpenCLMemObject(tempImageBuffer);
4918   if (imageKernelBuffer != (cl_mem) NULL)
4919     ReleaseOpenCLMemObject(imageKernelBuffer);
4920   if (blurRowKernel != (cl_kernel) NULL)
4921     ReleaseOpenCLKernel(blurRowKernel);
4922   if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4923     ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4924   if (queue != (cl_command_queue) NULL)
4925     ReleaseOpenCLCommandQueue(device,queue);
4926   if (device != (MagickCLDevice) NULL)
4927     ReleaseOpenCLDevice(device);
4928   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4929     filteredImage=DestroyImage(filteredImage);
4930 
4931   return(filteredImage);
4932 }
4933 
ComputeUnsharpMaskImageSingle(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4934 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4935   MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4936   const double threshold,ExceptionInfo *exception)
4937 {
4938   cl_command_queue
4939     queue;
4940 
4941   cl_int
4942     status;
4943 
4944   cl_kernel
4945     unsharpMaskKernel;
4946 
4947   cl_mem
4948     filteredImageBuffer,
4949     imageBuffer,
4950     imageKernelBuffer;
4951 
4952   cl_uint
4953     imageColumns,
4954     imageRows,
4955     kernelWidth,
4956     number_channels;
4957 
4958   float
4959     fGain,
4960     fThreshold;
4961 
4962   Image
4963     *filteredImage;
4964 
4965   MagickBooleanType
4966     outputReady;
4967 
4968   MagickCLDevice
4969     device;
4970 
4971   size_t
4972     gsize[2],
4973     i,
4974     lsize[2];
4975 
4976   filteredImage=NULL;
4977   imageKernelBuffer=NULL;
4978   unsharpMaskKernel=NULL;
4979   outputReady=MagickFalse;
4980 
4981   device=RequestOpenCLDevice(clEnv);
4982   queue=AcquireOpenCLCommandQueue(device);
4983   filteredImage=cloneImage(image,exception);
4984   if (filteredImage == (Image *) NULL)
4985     goto cleanup;
4986   if (filteredImage->number_channels != image->number_channels)
4987     goto cleanup;
4988   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4989   if (imageBuffer == (cl_mem) NULL)
4990     goto cleanup;
4991   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4992   if (filteredImageBuffer == (cl_mem) NULL)
4993     goto cleanup;
4994 
4995   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4996     exception);
4997 
4998   unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4999   if (unsharpMaskKernel == NULL)
5000   {
5001     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5002       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5003     goto cleanup;
5004   }
5005 
5006   imageColumns=(cl_uint) image->columns;
5007   imageRows=(cl_uint) image->rows;
5008   number_channels=(cl_uint) image->number_channels;
5009   fGain=(float) gain;
5010   fThreshold=(float) threshold;
5011 
5012   i=0;
5013   status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5014   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5015   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
5016   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
5017   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
5018   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
5019   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
5020   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
5021   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
5022   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
5023   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5024   if (status != CL_SUCCESS)
5025   {
5026     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5027       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5028     goto cleanup;
5029   }
5030 
5031   gsize[0]=((image->columns + 7) / 8)*8;
5032   gsize[1]=((image->rows + 31) / 32)*32;
5033   lsize[0]=8;
5034   lsize[1]=32;
5035   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
5036     gsize,lsize,image,filteredImage,exception);
5037 
5038 cleanup:
5039 
5040   if (imageKernelBuffer != (cl_mem) NULL)
5041     ReleaseOpenCLMemObject(imageKernelBuffer);
5042   if (unsharpMaskKernel != (cl_kernel) NULL)
5043     ReleaseOpenCLKernel(unsharpMaskKernel);
5044   if (queue != (cl_command_queue) NULL)
5045     ReleaseOpenCLCommandQueue(device,queue);
5046   if (device != (MagickCLDevice) NULL)
5047     ReleaseOpenCLDevice(device);
5048   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5049     filteredImage=DestroyImage(filteredImage);
5050 
5051   return(filteredImage);
5052 }
5053 
AccelerateUnsharpMaskImage(const Image * image,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5054 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5055   const double radius,const double sigma,const double gain,
5056   const double threshold,ExceptionInfo *exception)
5057 {
5058   Image
5059     *filteredImage;
5060 
5061   MagickCLEnv
5062     clEnv;
5063 
5064   assert(image != NULL);
5065   assert(exception != (ExceptionInfo *) NULL);
5066 
5067   if (checkAccelerateCondition(image) == MagickFalse)
5068     return((Image *) NULL);
5069 
5070   clEnv=getOpenCLEnvironment(exception);
5071   if (clEnv == (MagickCLEnv) NULL)
5072     return((Image *) NULL);
5073 
5074   if (radius < 12.1)
5075     filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5076       threshold,exception);
5077   else
5078     filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5079       threshold,exception);
5080   return(filteredImage);
5081 }
5082 
ComputeWaveletDenoiseImage(const Image * image,MagickCLEnv clEnv,const double threshold,ExceptionInfo * exception)5083 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5084   const double threshold,ExceptionInfo *exception)
5085 {
5086   cl_command_queue
5087     queue;
5088 
5089   const cl_int
5090     PASSES=5;
5091 
5092   const int
5093     TILESIZE=64,
5094     PAD=1<<(PASSES-1),
5095     SIZE=TILESIZE-2*PAD;
5096 
5097   cl_float
5098     thresh;
5099 
5100   cl_int
5101     status;
5102 
5103   cl_kernel
5104     denoiseKernel;
5105 
5106   cl_mem
5107     filteredImageBuffer,
5108     imageBuffer;
5109 
5110   cl_uint
5111     number_channels,
5112     width,
5113     height,
5114     max_channels;
5115 
5116   Image
5117     *filteredImage;
5118 
5119   MagickBooleanType
5120     outputReady;
5121 
5122   MagickCLDevice
5123     device;
5124 
5125   size_t
5126     gsize[2],
5127     i,
5128     lsize[2];
5129 
5130   filteredImage=NULL;
5131   denoiseKernel=NULL;
5132   outputReady=MagickFalse;
5133 
5134   device=RequestOpenCLDevice(clEnv);
5135   queue=AcquireOpenCLCommandQueue(device);
5136   filteredImage=CloneImage(image,image->columns,image->rows,MagickTrue,
5137     exception);
5138   if (filteredImage == (Image *) NULL)
5139     goto cleanup;
5140   if (filteredImage->number_channels != image->number_channels)
5141     goto cleanup;
5142   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5143   if (imageBuffer == (cl_mem) NULL)
5144     goto cleanup;
5145   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5146   if (filteredImageBuffer == (cl_mem) NULL)
5147     goto cleanup;
5148 
5149   denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5150   if (denoiseKernel == (cl_kernel) NULL)
5151   {
5152     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5153       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5154     goto cleanup;
5155   }
5156 
5157   number_channels=(cl_uint)image->number_channels;
5158   width=(cl_uint)image->columns;
5159   height=(cl_uint)image->rows;
5160   max_channels=number_channels;
5161   if ((max_channels == 4) || (max_channels == 2))
5162     max_channels=max_channels-1;
5163   thresh=threshold;
5164 
5165   i=0;
5166   status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5167   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5168   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5169   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5170   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5171   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5172   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5173   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5174   if (status != CL_SUCCESS)
5175   {
5176     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5177       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5178     goto cleanup;
5179   }
5180 
5181   gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5182   gsize[1]=((height+(SIZE-1))/SIZE)*4;
5183   lsize[0]=TILESIZE;
5184   lsize[1]=4;
5185   outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,(const size_t *) NULL,
5186     gsize,lsize,image,filteredImage,exception);
5187 
5188 cleanup:
5189 
5190   if (denoiseKernel != (cl_kernel) NULL)
5191     ReleaseOpenCLKernel(denoiseKernel);
5192   if (queue != (cl_command_queue) NULL)
5193     ReleaseOpenCLCommandQueue(device,queue);
5194   if (device != (MagickCLDevice) NULL)
5195     ReleaseOpenCLDevice(device);
5196   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5197     filteredImage=DestroyImage(filteredImage);
5198 
5199   return(filteredImage);
5200 }
5201 
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5202 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5203   const double threshold,ExceptionInfo *exception)
5204 {
5205   Image
5206     *filteredImage;
5207 
5208   MagickCLEnv
5209     clEnv;
5210 
5211   assert(image != NULL);
5212   assert(exception != (ExceptionInfo *)NULL);
5213 
5214   if (checkAccelerateCondition(image) == MagickFalse)
5215     return((Image *) NULL);
5216 
5217   clEnv=getOpenCLEnvironment(exception);
5218   if (clEnv == (MagickCLEnv) NULL)
5219     return((Image *) NULL);
5220 
5221   filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5222 
5223   return(filteredImage);
5224 }
5225 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5226