1 /*
2 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3 %                                                                             %
4 %                                                                             %
5 %                                                                             %
6 %     AAA     CCCC    CCCC  EEEEE  L      EEEEE  RRRR    AAA   TTTTT  EEEEE   %
7 %    A   A   C       C      E      L      E      R   R  A   A    T    E       %
8 %    AAAAA   C       C      EEE    L      EEE    RRRR   AAAAA    T    EEE     %
9 %    A   A   C       C      E      L      E      R R    A   A    T    E       %
10 %    A   A    CCCC    CCCC  EEEEE  LLLLL  EEEEE  R  R   A   A    T    EEEEE   %
11 %                                                                             %
12 %                                                                             %
13 %                       MagickCore Acceleration Methods                       %
14 %                                                                             %
15 %                              Software Design                                %
16 %                                  Cristy                                     %
17 %                               SiuChi Chan                                   %
18 %                              Guansong Zhang                                 %
19 %                               January 2010                                  %
20 %                               Dirk Lemstra                                  %
21 %                                April 2016                                   %
22 %                                                                             %
23 %                                                                             %
24 %  Copyright 1999-2019 ImageMagick Studio LLC, a non-profit organization      %
25 %  dedicated to making software imaging solutions freely available.           %
26 %                                                                             %
27 %  You may not use this file except in compliance with the License.  You may  %
28 %  obtain a copy of the License at                                            %
29 %                                                                             %
30 %    https://imagemagick.org/script/license.php                               %
31 %                                                                             %
32 %  Unless required by applicable law or agreed to in writing, software        %
33 %  distributed under the License is distributed on an "AS IS" BASIS,          %
34 %  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.   %
35 %  See the License for the specific language governing permissions and        %
36 %  limitations under the License.                                             %
37 %                                                                             %
38 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
39 */
40 
41 /*
42 Include declarations.
43 */
44 #include "MagickCore/studio.h"
45 #include "MagickCore/accelerate-private.h"
46 #include "MagickCore/accelerate-kernels-private.h"
47 #include "MagickCore/artifact.h"
48 #include "MagickCore/cache.h"
49 #include "MagickCore/cache-private.h"
50 #include "MagickCore/cache-view.h"
51 #include "MagickCore/color-private.h"
52 #include "MagickCore/delegate-private.h"
53 #include "MagickCore/enhance.h"
54 #include "MagickCore/exception.h"
55 #include "MagickCore/exception-private.h"
56 #include "MagickCore/gem.h"
57 #include "MagickCore/image.h"
58 #include "MagickCore/image-private.h"
59 #include "MagickCore/linked-list.h"
60 #include "MagickCore/list.h"
61 #include "MagickCore/memory_.h"
62 #include "MagickCore/monitor-private.h"
63 #include "MagickCore/opencl.h"
64 #include "MagickCore/opencl-private.h"
65 #include "MagickCore/option.h"
66 #include "MagickCore/pixel-accessor.h"
67 #include "MagickCore/pixel-private.h"
68 #include "MagickCore/prepress.h"
69 #include "MagickCore/quantize.h"
70 #include "MagickCore/quantum-private.h"
71 #include "MagickCore/random_.h"
72 #include "MagickCore/random-private.h"
73 #include "MagickCore/registry.h"
74 #include "MagickCore/resize.h"
75 #include "MagickCore/resize-private.h"
76 #include "MagickCore/semaphore.h"
77 #include "MagickCore/splay-tree.h"
78 #include "MagickCore/statistic.h"
79 #include "MagickCore/string_.h"
80 #include "MagickCore/string-private.h"
81 #include "MagickCore/token.h"
82 
83 #define MAGICK_MAX(x,y) (((x) >= (y))?(x):(y))
84 #define MAGICK_MIN(x,y) (((x) <= (y))?(x):(y))
85 
86 #if defined(MAGICKCORE_OPENCL_SUPPORT)
87 
88 /*
89   Define declarations.
90 */
91 #define ALIGNED(pointer,type) ((((size_t)(pointer)) & (sizeof(type)-1)) == 0)
92 
93 /*
94   Static declarations.
95 */
96 static const ResizeWeightingFunctionType supportedResizeWeighting[] =
97 {
98   BoxWeightingFunction,
99   TriangleWeightingFunction,
100   HannWeightingFunction,
101   HammingWeightingFunction,
102   BlackmanWeightingFunction,
103   CubicBCWeightingFunction,
104   SincWeightingFunction,
105   SincFastWeightingFunction,
106   LastWeightingFunction
107 };
108 
109 /*
110   Helper functions.
111 */
checkAccelerateCondition(const Image * image)112 static MagickBooleanType checkAccelerateCondition(const Image* image)
113 {
114   /* only direct class images are supported */
115   if (image->storage_class != DirectClass)
116     return(MagickFalse);
117 
118   /* check if the image's colorspace is supported */
119   if (image->colorspace != RGBColorspace &&
120       image->colorspace != sRGBColorspace &&
121       image->colorspace != LinearGRAYColorspace &&
122       image->colorspace != GRAYColorspace)
123     return(MagickFalse);
124 
125   /* check if the virtual pixel method is compatible with the OpenCL implementation */
126   if ((GetImageVirtualPixelMethod(image) != UndefinedVirtualPixelMethod) &&
127       (GetImageVirtualPixelMethod(image) != EdgeVirtualPixelMethod))
128     return(MagickFalse);
129 
130   /* check if the image has mask */
131   if (((image->channels & ReadMaskChannel) != 0) ||
132       ((image->channels & WriteMaskChannel) != 0) ||
133       ((image->channels & CompositeMaskChannel) != 0))
134     return(MagickFalse);
135 
136   if (image->number_channels > 4)
137     return(MagickFalse);
138 
139   /* check if pixel order is R */
140   if (GetPixelChannelOffset(image,RedPixelChannel) != 0)
141     return(MagickFalse);
142 
143   if (image->number_channels == 1)
144     return(MagickTrue);
145 
146   /* check if pixel order is RA */
147   if ((image->number_channels == 2) &&
148       (GetPixelChannelOffset(image,AlphaPixelChannel) == 1))
149     return(MagickTrue);
150 
151   if (image->number_channels == 2)
152     return(MagickFalse);
153 
154   /* check if pixel order is RGB */
155   if ((GetPixelChannelOffset(image,GreenPixelChannel) != 1) ||
156       (GetPixelChannelOffset(image,BluePixelChannel) != 2))
157     return(MagickFalse);
158 
159   if (image->number_channels == 3)
160     return(MagickTrue);
161 
162   /* check if pixel order is RGBA */
163   if (GetPixelChannelOffset(image,AlphaPixelChannel) != 3)
164     return(MagickFalse);
165 
166   return(MagickTrue);
167 }
168 
checkAccelerateConditionRGBA(const Image * image)169 static MagickBooleanType checkAccelerateConditionRGBA(const Image* image)
170 {
171   if (checkAccelerateCondition(image) == MagickFalse)
172     return(MagickFalse);
173 
174   /* the order will be RGBA if the image has 4 channels */
175   if (image->number_channels != 4)
176     return(MagickFalse);
177 
178   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
179       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
180       (GetPixelBlueTraits(image) == UndefinedPixelTrait) ||
181       (GetPixelAlphaTraits(image) == UndefinedPixelTrait))
182     return(MagickFalse);
183 
184   return(MagickTrue);
185 }
186 
checkPixelIntensity(const Image * image,const PixelIntensityMethod method)187 static MagickBooleanType checkPixelIntensity(const Image *image,
188   const PixelIntensityMethod method)
189 {
190   /* EncodePixelGamma and DecodePixelGamma are not supported */
191   if ((method == Rec601LumaPixelIntensityMethod) ||
192       (method == Rec709LumaPixelIntensityMethod))
193     {
194       if (image->colorspace == RGBColorspace)
195         return(MagickFalse);
196     }
197 
198   if ((method == Rec601LuminancePixelIntensityMethod) ||
199       (method == Rec709LuminancePixelIntensityMethod))
200     {
201       if (image->colorspace == sRGBColorspace)
202         return(MagickFalse);
203     }
204 
205   return(MagickTrue);
206 }
207 
checkHistogramCondition(const Image * image,const PixelIntensityMethod method)208 static MagickBooleanType checkHistogramCondition(const Image *image,
209   const PixelIntensityMethod method)
210 {
211   /* ensure this is the only pass get in for now. */
212   if ((image->channel_mask & SyncChannels) == 0)
213     return MagickFalse;
214 
215   return(checkPixelIntensity(image,method));
216 }
217 
getOpenCLEnvironment(ExceptionInfo * exception)218 static MagickCLEnv getOpenCLEnvironment(ExceptionInfo* exception)
219 {
220   MagickCLEnv
221     clEnv;
222 
223   clEnv=GetCurrentOpenCLEnv();
224   if (clEnv == (MagickCLEnv) NULL)
225     return((MagickCLEnv) NULL);
226 
227   if (clEnv->enabled == MagickFalse)
228     return((MagickCLEnv) NULL);
229 
230   if (InitializeOpenCL(clEnv,exception) == MagickFalse)
231     return((MagickCLEnv) NULL);
232 
233   return(clEnv);
234 }
235 
cloneImage(const Image * image,ExceptionInfo * exception)236 static Image *cloneImage(const Image* image,ExceptionInfo *exception)
237 {
238   Image
239     *clone;
240 
241   if (((image->channel_mask & RedChannel) != 0) &&
242       ((image->channel_mask & GreenChannel) != 0) &&
243       ((image->channel_mask & BlueChannel) != 0) &&
244       ((image->channel_mask & AlphaChannel) != 0))
245     clone=CloneImage(image,0,0,MagickTrue,exception);
246   else
247     {
248       clone=CloneImage(image,0,0,MagickTrue,exception);
249       if (clone != (Image *) NULL)
250         SyncImagePixelCache(clone,exception);
251     }
252   return(clone);
253 }
254 
255 /* pad the global workgroup size to the next multiple of
256    the local workgroup size */
padGlobalWorkgroupSizeToLocalWorkgroupSize(const unsigned int orgGlobalSize,const unsigned int localGroupSize)257 inline static unsigned int padGlobalWorkgroupSizeToLocalWorkgroupSize(
258   const unsigned int orgGlobalSize,const unsigned int localGroupSize)
259 {
260   return ((orgGlobalSize+(localGroupSize-1))/localGroupSize*localGroupSize);
261 }
262 
createKernelInfo(MagickCLDevice device,const double radius,const double sigma,cl_uint * width,ExceptionInfo * exception)263 static cl_mem createKernelInfo(MagickCLDevice device,const double radius,
264   const double sigma,cl_uint *width,ExceptionInfo *exception)
265 {
266   char
267     geometry[MagickPathExtent];
268 
269   cl_mem
270     imageKernelBuffer;
271 
272   float
273     *kernelBufferPtr;
274 
275   KernelInfo
276     *kernel;
277 
278   ssize_t
279     i;
280 
281   (void) FormatLocaleString(geometry,MagickPathExtent,
282     "blur:%.20gx%.20g;blur:%.20gx%.20g+90",radius,sigma,radius,sigma);
283   kernel=AcquireKernelInfo(geometry,exception);
284   if (kernel == (KernelInfo *) NULL)
285   {
286     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
287       ResourceLimitWarning,"AcquireKernelInfo failed.",".");
288     return((cl_mem) NULL);
289   }
290   kernelBufferPtr=(float *)AcquireMagickMemory(kernel->width*
291     sizeof(*kernelBufferPtr));
292   if (kernelBufferPtr == (float *) NULL)
293     {
294       kernel=DestroyKernelInfo(kernel);
295       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
296         ResourceLimitWarning,"MemoryAllocationFailed.",".");
297       return((cl_mem) NULL);
298     }
299   for (i = 0; i < (ssize_t) kernel->width; i++)
300     kernelBufferPtr[i] = (float)kernel->values[i];
301   imageKernelBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
302     CL_MEM_READ_ONLY,kernel->width*sizeof(*kernelBufferPtr),kernelBufferPtr);
303   *width=(cl_uint) kernel->width;
304   kernelBufferPtr=(float *) RelinquishMagickMemory(kernelBufferPtr);
305   kernel=DestroyKernelInfo(kernel);
306   if (imageKernelBuffer == (cl_mem) NULL)
307     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
308       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
309   return(imageKernelBuffer);
310 }
311 
LaunchHistogramKernel(MagickCLEnv clEnv,MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,cl_mem histogramBuffer,Image * image,const ChannelType channel,ExceptionInfo * exception)312 static MagickBooleanType LaunchHistogramKernel(MagickCLEnv clEnv,
313   MagickCLDevice device,cl_command_queue queue,cl_mem imageBuffer,
314   cl_mem histogramBuffer,Image *image,const ChannelType channel,
315   ExceptionInfo *exception)
316 {
317   MagickBooleanType
318     outputReady;
319 
320   cl_int
321     clStatus;
322 
323   cl_kernel
324     histogramKernel;
325 
326   cl_event
327     event;
328 
329   cl_uint
330     colorspace,
331     method;
332 
333   register ssize_t
334     i;
335 
336   size_t
337     global_work_size[2];
338 
339   histogramKernel = NULL;
340 
341   outputReady = MagickFalse;
342   colorspace = image->colorspace;
343   method = image->intensity;
344 
345   /* get the OpenCL kernel */
346   histogramKernel = AcquireOpenCLKernel(device,"Histogram");
347   if (histogramKernel == NULL)
348   {
349     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
350     goto cleanup;
351   }
352 
353   /* set the kernel arguments */
354   i = 0;
355   clStatus=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
356   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(ChannelType),&channel);
357   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&colorspace);
358   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_uint),&method);
359   clStatus|=clEnv->library->clSetKernelArg(histogramKernel,i++,sizeof(cl_mem),(void *)&histogramBuffer);
360   if (clStatus != CL_SUCCESS)
361   {
362     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
363     goto cleanup;
364   }
365 
366   /* launch the kernel */
367   global_work_size[0] = image->columns;
368   global_work_size[1] = image->rows;
369 
370   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, histogramKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
371 
372   if (clStatus != CL_SUCCESS)
373   {
374     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
375     goto cleanup;
376   }
377   RecordProfileData(device,histogramKernel,event);
378 
379   outputReady = MagickTrue;
380 
381 cleanup:
382 
383   if (histogramKernel!=NULL)
384     ReleaseOpenCLKernel(histogramKernel);
385 
386   return(outputReady);
387 }
388 
389 /*
390 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
391 %                                                                             %
392 %                                                                             %
393 %                                                                             %
394 %     A c c e l e r a t e A d d N o i s e I m a g e                           %
395 %                                                                             %
396 %                                                                             %
397 %                                                                             %
398 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
399 */
400 
ComputeAddNoiseImage(const Image * image,MagickCLEnv clEnv,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)401 static Image *ComputeAddNoiseImage(const Image *image,MagickCLEnv clEnv,
402   const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
403 {
404   cl_command_queue
405     queue;
406 
407   cl_float
408     cl_attenuate;
409 
410   cl_int
411     status;
412 
413   cl_kernel
414     addNoiseKernel;
415 
416   cl_mem
417     filteredImageBuffer,
418     imageBuffer;
419 
420   cl_uint
421     bufferLength,
422     inputPixelCount,
423     number_channels,
424     numRandomNumberPerPixel,
425     pixelsPerWorkitem,
426     seed0,
427     seed1,
428     workItemCount;
429 
430   const unsigned long
431     *s;
432 
433   MagickBooleanType
434     outputReady;
435 
436   MagickCLDevice
437     device;
438 
439   Image
440     *filteredImage;
441 
442   RandomInfo
443     *randomInfo;
444 
445   size_t
446     gsize[1],
447     i,
448     lsize[1],
449     numRandPerChannel;
450 
451   filteredImage=NULL;
452   imageBuffer=NULL;
453   filteredImageBuffer=NULL;
454   addNoiseKernel=NULL;
455   outputReady=MagickFalse;
456 
457   device=RequestOpenCLDevice(clEnv);
458   queue=AcquireOpenCLCommandQueue(device);
459   if (queue == (cl_command_queue) NULL)
460     goto cleanup;
461   filteredImage=cloneImage(image,exception);
462   if (filteredImage == (Image *) NULL)
463     goto cleanup;
464   if (filteredImage->number_channels != image->number_channels)
465     goto cleanup;
466   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
467   if (imageBuffer == (cl_mem) NULL)
468     goto cleanup;
469   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
470   if (filteredImageBuffer == (cl_mem) NULL)
471     goto cleanup;
472 
473   /* find out how many random numbers needed by pixel */
474   numRandPerChannel=0;
475   numRandomNumberPerPixel=0;
476   switch (noise_type)
477   {
478     case UniformNoise:
479     case ImpulseNoise:
480     case LaplacianNoise:
481     case RandomNoise:
482     default:
483       numRandPerChannel=1;
484       break;
485     case GaussianNoise:
486     case MultiplicativeGaussianNoise:
487     case PoissonNoise:
488       numRandPerChannel=2;
489       break;
490   };
491   if (GetPixelRedTraits(image) != UndefinedPixelTrait)
492     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
493   if (GetPixelGreenTraits(image) != UndefinedPixelTrait)
494     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
495   if (GetPixelBlueTraits(image) != UndefinedPixelTrait)
496     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
497   if (GetPixelAlphaTraits(image) != UndefinedPixelTrait)
498     numRandomNumberPerPixel+=(cl_uint) numRandPerChannel;
499 
500   addNoiseKernel=AcquireOpenCLKernel(device,"AddNoise");
501   if (addNoiseKernel == (cl_kernel) NULL)
502   {
503     (void)OpenCLThrowMagickException(device,exception,GetMagickModule(),
504       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
505     goto cleanup;
506   }
507 
508   /* 256 work items per group, 2 groups per CU */
509   workItemCount=device->max_compute_units*2*256;
510   inputPixelCount=(cl_int) (image->columns*image->rows);
511   pixelsPerWorkitem=(inputPixelCount+workItemCount-1)/workItemCount;
512   pixelsPerWorkitem=((pixelsPerWorkitem+3)/4)*4;
513   lsize[0]=256;
514   gsize[0]=workItemCount;
515 
516   randomInfo=AcquireRandomInfo();
517   s=GetRandomInfoSeed(randomInfo);
518   seed0=s[0];
519   (void) GetPseudoRandomValue(randomInfo);
520   seed1=s[0];
521   randomInfo=DestroyRandomInfo(randomInfo);
522 
523   number_channels=(cl_uint) image->number_channels;
524   bufferLength=(cl_uint) (image->columns*image->rows*image->number_channels);
525   cl_attenuate=(cl_float) attenuate;
526 
527   i=0;
528   status =SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
529   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
530   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
531   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&bufferLength);
532   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&pixelsPerWorkitem);
533   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(NoiseType),(void *)&noise_type);
534   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_float),(void *)&cl_attenuate);
535   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed0);
536   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&seed1);
537   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_uint),(void *)&numRandomNumberPerPixel);
538   status|=SetOpenCLKernelArg(addNoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
539   if (status != CL_SUCCESS)
540   {
541     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
542       ResourceLimitWarning,"clSetKernelArg failed.",".");
543     goto cleanup;
544   }
545 
546   outputReady=EnqueueOpenCLKernel(queue,addNoiseKernel,1,(const size_t *) NULL,gsize,
547     lsize,image,filteredImage,MagickFalse,exception);
548 
549 cleanup:
550 
551   if (imageBuffer != (cl_mem) NULL)
552     ReleaseOpenCLMemObject(imageBuffer);
553   if (filteredImageBuffer != (cl_mem) NULL)
554     ReleaseOpenCLMemObject(filteredImageBuffer);
555   if (addNoiseKernel != (cl_kernel) NULL)
556     ReleaseOpenCLKernel(addNoiseKernel);
557   if (queue != (cl_command_queue) NULL)
558     ReleaseOpenCLCommandQueue(device,queue);
559   if (device != (MagickCLDevice) NULL)
560     ReleaseOpenCLDevice(device);
561   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
562     filteredImage=DestroyImage(filteredImage);
563 
564   return(filteredImage);
565 }
566 
AccelerateAddNoiseImage(const Image * image,const NoiseType noise_type,const double attenuate,ExceptionInfo * exception)567 MagickPrivate Image *AccelerateAddNoiseImage(const Image *image,
568   const NoiseType noise_type,const double attenuate,ExceptionInfo *exception)
569 {
570   Image
571     *filteredImage;
572 
573   MagickCLEnv
574     clEnv;
575 
576   assert(image != NULL);
577   assert(exception != (ExceptionInfo *) NULL);
578 
579   if (checkAccelerateCondition(image) == MagickFalse)
580     return((Image *) NULL);
581 
582   clEnv=getOpenCLEnvironment(exception);
583   if (clEnv == (MagickCLEnv) NULL)
584     return((Image *) NULL);
585 
586   filteredImage=ComputeAddNoiseImage(image,clEnv,noise_type,attenuate,
587     exception);
588   return(filteredImage);
589 }
590 
591 /*
592 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
593 %                                                                             %
594 %                                                                             %
595 %                                                                             %
596 %     A c c e l e r a t e B l u r I m a g e                                   %
597 %                                                                             %
598 %                                                                             %
599 %                                                                             %
600 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
601 */
602 
ComputeBlurImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,ExceptionInfo * exception)603 static Image *ComputeBlurImage(const Image* image,MagickCLEnv clEnv,
604   const double radius,const double sigma,ExceptionInfo *exception)
605 {
606   cl_command_queue
607     queue;
608 
609   cl_int
610     status;
611 
612   cl_kernel
613     blurColumnKernel,
614     blurRowKernel;
615 
616   cl_mem
617     filteredImageBuffer,
618     imageBuffer,
619     imageKernelBuffer,
620     tempImageBuffer;
621 
622   cl_uint
623     imageColumns,
624     imageRows,
625     kernelWidth,
626     number_channels;
627 
628   Image
629     *filteredImage;
630 
631   MagickBooleanType
632     outputReady;
633 
634   MagickCLDevice
635     device;
636 
637   MagickSizeType
638     length;
639 
640   size_t
641     chunkSize=256,
642     gsize[2],
643     i,
644     lsize[2];
645 
646   filteredImage=NULL;
647   imageBuffer=NULL;
648   filteredImageBuffer=NULL;
649   tempImageBuffer=NULL;
650   imageKernelBuffer=NULL;
651   blurRowKernel=NULL;
652   blurColumnKernel=NULL;
653   outputReady=MagickFalse;
654 
655   device=RequestOpenCLDevice(clEnv);
656   queue=AcquireOpenCLCommandQueue(device);
657   filteredImage=cloneImage(image,exception);
658   if (filteredImage == (Image *) NULL)
659     goto cleanup;
660   if (filteredImage->number_channels != image->number_channels)
661     goto cleanup;
662   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
663   if (imageBuffer == (cl_mem) NULL)
664     goto cleanup;
665   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
666   if (filteredImageBuffer == (cl_mem) NULL)
667     goto cleanup;
668 
669   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
670     exception);
671   if (imageKernelBuffer == (cl_mem) NULL)
672     goto cleanup;
673 
674   length=image->columns*image->rows;
675   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
676     sizeof(cl_float4),(void *) NULL);
677   if (tempImageBuffer == (cl_mem) NULL)
678     goto cleanup;
679 
680   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
681   if (blurRowKernel == (cl_kernel) NULL)
682   {
683     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
684       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
685     goto cleanup;
686   }
687 
688   number_channels=(cl_uint) image->number_channels;
689   imageColumns=(cl_uint) image->columns;
690   imageRows=(cl_uint) image->rows;
691 
692   i=0;
693   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
694   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
695   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
696   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
697   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
698   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
699   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
700   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
701   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
702   if (status != CL_SUCCESS)
703   {
704     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
705       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
706     goto cleanup;
707   }
708 
709   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
710   gsize[1]=image->rows;
711   lsize[0]=chunkSize;
712   lsize[1]=1;
713 
714   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,(size_t *) NULL,gsize,
715     lsize,image,filteredImage,MagickFalse,exception);
716   if (outputReady == MagickFalse)
717     goto cleanup;
718 
719   blurColumnKernel=AcquireOpenCLKernel(device,"BlurColumn");
720   if (blurColumnKernel == (cl_kernel) NULL)
721   {
722     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
723       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
724     goto cleanup;
725   }
726 
727   i=0;
728   status =SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
729   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),&number_channels);
730   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
731   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
732   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
733   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
734   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
735   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
736   status|=SetOpenCLKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
737   if (status != CL_SUCCESS)
738   {
739     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
740       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
741     goto cleanup;
742   }
743 
744   gsize[0]=image->columns;
745   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
746   lsize[0]=1;
747   lsize[1]=chunkSize;
748 
749   outputReady=EnqueueOpenCLKernel(queue,blurColumnKernel,2,(size_t *) NULL,gsize,
750     lsize,image,filteredImage,MagickFalse,exception);
751 
752 cleanup:
753 
754   if (imageBuffer != (cl_mem) NULL)
755     ReleaseOpenCLMemObject(imageBuffer);
756   if (filteredImageBuffer != (cl_mem) NULL)
757     ReleaseOpenCLMemObject(filteredImageBuffer);
758   if (tempImageBuffer != (cl_mem) NULL)
759     ReleaseOpenCLMemObject(tempImageBuffer);
760   if (imageKernelBuffer != (cl_mem) NULL)
761     ReleaseOpenCLMemObject(imageKernelBuffer);
762   if (blurRowKernel != (cl_kernel) NULL)
763     ReleaseOpenCLKernel(blurRowKernel);
764   if (blurColumnKernel != (cl_kernel) NULL)
765     ReleaseOpenCLKernel(blurColumnKernel);
766   if (queue != (cl_command_queue) NULL)
767     ReleaseOpenCLCommandQueue(device,queue);
768   if (device != (MagickCLDevice) NULL)
769     ReleaseOpenCLDevice(device);
770   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
771     filteredImage=DestroyImage(filteredImage);
772 
773   return(filteredImage);
774 }
775 
AccelerateBlurImage(const Image * image,const double radius,const double sigma,ExceptionInfo * exception)776 MagickPrivate Image* AccelerateBlurImage(const Image *image,
777   const double radius,const double sigma,ExceptionInfo *exception)
778 {
779   Image
780     *filteredImage;
781 
782   MagickCLEnv
783     clEnv;
784 
785   assert(image != NULL);
786   assert(exception != (ExceptionInfo *) NULL);
787 
788   if (checkAccelerateCondition(image) == MagickFalse)
789     return((Image *) NULL);
790 
791   clEnv=getOpenCLEnvironment(exception);
792   if (clEnv == (MagickCLEnv) NULL)
793     return((Image *) NULL);
794 
795   filteredImage=ComputeBlurImage(image,clEnv,radius,sigma,exception);
796   return(filteredImage);
797 }
798 
799 /*
800 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
801 %                                                                             %
802 %                                                                             %
803 %                                                                             %
804 %     A c c e l e r a t e C o n t r a s t I m a g e                           %
805 %                                                                             %
806 %                                                                             %
807 %                                                                             %
808 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
809 */
810 
ComputeContrastImage(Image * image,MagickCLEnv clEnv,const MagickBooleanType sharpen,ExceptionInfo * exception)811 static MagickBooleanType ComputeContrastImage(Image *image,MagickCLEnv clEnv,
812   const MagickBooleanType sharpen,ExceptionInfo *exception)
813 {
814   cl_command_queue
815     queue;
816 
817   cl_int
818     status,
819     sign;
820 
821   cl_kernel
822     contrastKernel;
823 
824   cl_mem
825     imageBuffer;
826 
827   cl_uint
828     number_channels;
829 
830   MagickBooleanType
831     outputReady;
832 
833   MagickCLDevice
834     device;
835 
836   size_t
837     gsize[2],
838     i;
839 
840   contrastKernel=NULL;
841   imageBuffer=NULL;
842   outputReady=MagickFalse;
843 
844   device=RequestOpenCLDevice(clEnv);
845   queue=AcquireOpenCLCommandQueue(device);
846   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
847   if (imageBuffer == (cl_mem) NULL)
848     goto cleanup;
849 
850   contrastKernel=AcquireOpenCLKernel(device,"Contrast");
851   if (contrastKernel == (cl_kernel) NULL)
852   {
853     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
854       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
855     goto cleanup;
856   }
857 
858   number_channels=(cl_uint) image->number_channels;
859   sign=sharpen != MagickFalse ? 1 : -1;
860 
861   i=0;
862   status =SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
863   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_uint),&number_channels);
864   status|=SetOpenCLKernelArg(contrastKernel,i++,sizeof(cl_int),&sign);
865   if (status != CL_SUCCESS)
866   {
867     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
868       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
869     goto cleanup;
870   }
871 
872   gsize[0]=image->columns;
873   gsize[1]=image->rows;
874 
875   outputReady=EnqueueOpenCLKernel(queue,contrastKernel,2,(const size_t *) NULL,
876     gsize,(const size_t *) NULL,image,(Image *) NULL,MagickFalse,exception);
877 
878 cleanup:
879 
880   if (imageBuffer != (cl_mem) NULL)
881     ReleaseOpenCLMemObject(imageBuffer);
882   if (contrastKernel != (cl_kernel) NULL)
883     ReleaseOpenCLKernel(contrastKernel);
884   if (queue != (cl_command_queue) NULL)
885     ReleaseOpenCLCommandQueue(device,queue);
886   if (device != (MagickCLDevice) NULL)
887     ReleaseOpenCLDevice(device);
888 
889   return(outputReady);
890 }
891 
AccelerateContrastImage(Image * image,const MagickBooleanType sharpen,ExceptionInfo * exception)892 MagickPrivate MagickBooleanType AccelerateContrastImage(Image *image,
893   const MagickBooleanType sharpen,ExceptionInfo *exception)
894 {
895   MagickBooleanType
896     status;
897 
898   MagickCLEnv
899     clEnv;
900 
901   assert(image != NULL);
902   assert(exception != (ExceptionInfo *) NULL);
903 
904   if (checkAccelerateCondition(image) == MagickFalse)
905     return(MagickFalse);
906 
907   clEnv=getOpenCLEnvironment(exception);
908   if (clEnv == (MagickCLEnv) NULL)
909     return(MagickFalse);
910 
911   status=ComputeContrastImage(image,clEnv,sharpen,exception);
912   return(status);
913 }
914 
915 /*
916 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
917 %                                                                             %
918 %                                                                             %
919 %                                                                             %
920 %     A c c e l e r a t e C o n t r a s t S t r e t c h I m a g e             %
921 %                                                                             %
922 %                                                                             %
923 %                                                                             %
924 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
925 */
926 
ComputeContrastStretchImage(Image * image,MagickCLEnv clEnv,const double black_point,const double white_point,ExceptionInfo * exception)927 static MagickBooleanType ComputeContrastStretchImage(Image *image,
928   MagickCLEnv clEnv,const double black_point,const double white_point,
929   ExceptionInfo *exception)
930 {
931 #define ContrastStretchImageTag  "ContrastStretch/Image"
932 #define MaxRange(color)  ((cl_float) ScaleQuantumToMap((Quantum) (color)))
933 
934   CacheView
935     *image_view;
936 
937   cl_command_queue
938     queue;
939 
940   cl_int
941     clStatus;
942 
943   cl_mem_flags
944     mem_flags;
945 
946   cl_mem
947     histogramBuffer,
948     imageBuffer,
949     stretchMapBuffer;
950 
951   cl_kernel
952     histogramKernel,
953     stretchKernel;
954 
955   cl_event
956     event;
957 
958   cl_uint4
959     *histogram;
960 
961   double
962     intensity;
963 
964   cl_float4
965     black,
966     white;
967 
968   MagickBooleanType
969     outputReady,
970     status;
971 
972   MagickCLDevice
973     device;
974 
975   MagickSizeType
976     length;
977 
978   PixelPacket
979     *stretch_map;
980 
981   register ssize_t
982     i;
983 
984   size_t
985     global_work_size[2];
986 
987   void
988     *hostPtr,
989     *inputPixels;
990 
991   histogram=NULL;
992   stretch_map=NULL;
993   inputPixels = NULL;
994   imageBuffer = NULL;
995   histogramBuffer = NULL;
996   stretchMapBuffer = NULL;
997   histogramKernel = NULL;
998   stretchKernel = NULL;
999   queue = NULL;
1000   outputReady = MagickFalse;
1001 
1002 
1003   assert(image != (Image *) NULL);
1004   assert(image->signature == MagickCoreSignature);
1005   if (image->debug != MagickFalse)
1006     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
1007 
1008   /* exception=(&image->exception); */
1009 
1010   /*
1011     Initialize opencl environment.
1012   */
1013   device = RequestOpenCLDevice(clEnv);
1014   queue = AcquireOpenCLCommandQueue(device);
1015 
1016   /*
1017     Allocate and initialize histogram arrays.
1018   */
1019   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
1020 
1021   if (histogram == (cl_uint4 *) NULL)
1022     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed", image->filename);
1023 
1024   /* reset histogram */
1025   (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
1026 
1027   /*
1028   if (IsGrayImage(image,exception) != MagickFalse)
1029     (void) SetImageColorspace(image,GRAYColorspace);
1030   */
1031 
1032   status=MagickTrue;
1033 
1034 
1035   /*
1036     Form histogram.
1037   */
1038   /* Create and initialize OpenCL buffers. */
1039   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
1040   /* assume this  will get a writable image */
1041   image_view=AcquireAuthenticCacheView(image,exception);
1042   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1043 
1044   if (inputPixels == (void *) NULL)
1045   {
1046     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1047     goto cleanup;
1048   }
1049   /* If the host pointer is aligned to the size of CLPixelPacket,
1050      then use the host buffer directly from the GPU; otherwise,
1051      create a buffer on the GPU and copy the data over */
1052   if (ALIGNED(inputPixels,CLPixelPacket))
1053   {
1054     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1055   }
1056   else
1057   {
1058     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1059   }
1060   /* create a CL buffer from image pixel buffer */
1061   length = image->columns * image->rows;
1062   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1063   if (clStatus != CL_SUCCESS)
1064   {
1065     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1066     goto cleanup;
1067   }
1068 
1069   /* If the host pointer is aligned to the size of cl_uint,
1070      then use the host buffer directly from the GPU; otherwise,
1071      create a buffer on the GPU and copy the data over */
1072   if (ALIGNED(histogram,cl_uint4))
1073   {
1074     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1075     hostPtr = histogram;
1076   }
1077   else
1078   {
1079     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1080     hostPtr = histogram;
1081   }
1082   /* create a CL buffer for histogram  */
1083   length = (MaxMap+1);
1084   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
1085   if (clStatus != CL_SUCCESS)
1086   {
1087     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1088     goto cleanup;
1089   }
1090 
1091   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask,exception);
1092   if (status == MagickFalse)
1093     goto cleanup;
1094 
1095   /* read from the kenel output */
1096   if (ALIGNED(histogram,cl_uint4))
1097   {
1098     length = (MaxMap+1);
1099     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
1100   }
1101   else
1102   {
1103     length = (MaxMap+1);
1104     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
1105   }
1106   if (clStatus != CL_SUCCESS)
1107   {
1108     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1109     goto cleanup;
1110   }
1111 
1112   /* unmap, don't block gpu to use this buffer again.  */
1113   if (ALIGNED(histogram,cl_uint4))
1114   {
1115     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
1116     if (clStatus != CL_SUCCESS)
1117     {
1118       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1119       goto cleanup;
1120     }
1121   }
1122 
1123   /* recreate input buffer later, in case image updated */
1124 #ifdef RECREATEBUFFER
1125   if (imageBuffer!=NULL)
1126     clEnv->library->clReleaseMemObject(imageBuffer);
1127 #endif
1128 
1129   /* CPU stuff */
1130   /*
1131      Find the histogram boundaries by locating the black/white levels.
1132   */
1133   black.x=0.0;
1134   white.x=MaxRange(QuantumRange);
1135   if ((image->channel_mask & RedChannel) != 0)
1136   {
1137     intensity=0.0;
1138     for (i=0; i <= (ssize_t) MaxMap; i++)
1139     {
1140       intensity+=histogram[i].s[2];
1141       if (intensity > black_point)
1142         break;
1143     }
1144     black.x=(cl_float) i;
1145     intensity=0.0;
1146     for (i=(ssize_t) MaxMap; i != 0; i--)
1147     {
1148       intensity+=histogram[i].s[2];
1149       if (intensity > ((double) image->columns*image->rows-white_point))
1150         break;
1151     }
1152     white.x=(cl_float) i;
1153   }
1154   black.y=0.0;
1155   white.y=MaxRange(QuantumRange);
1156   if ((image->channel_mask & GreenChannel) != 0)
1157   {
1158     intensity=0.0;
1159     for (i=0; i <= (ssize_t) MaxMap; i++)
1160     {
1161       intensity+=histogram[i].s[2];
1162       if (intensity > black_point)
1163         break;
1164     }
1165     black.y=(cl_float) i;
1166     intensity=0.0;
1167     for (i=(ssize_t) MaxMap; i != 0; i--)
1168     {
1169       intensity+=histogram[i].s[2];
1170       if (intensity > ((double) image->columns*image->rows-white_point))
1171         break;
1172     }
1173     white.y=(cl_float) i;
1174   }
1175   black.z=0.0;
1176   white.z=MaxRange(QuantumRange);
1177   if ((image->channel_mask & BlueChannel) != 0)
1178   {
1179     intensity=0.0;
1180     for (i=0; i <= (ssize_t) MaxMap; i++)
1181     {
1182       intensity+=histogram[i].s[2];
1183       if (intensity > black_point)
1184         break;
1185     }
1186     black.z=(cl_float) i;
1187     intensity=0.0;
1188     for (i=(ssize_t) MaxMap; i != 0; i--)
1189     {
1190       intensity+=histogram[i].s[2];
1191       if (intensity > ((double) image->columns*image->rows-white_point))
1192         break;
1193     }
1194     white.z=(cl_float) i;
1195   }
1196   black.w=0.0;
1197   white.w=MaxRange(QuantumRange);
1198   if ((image->channel_mask & AlphaChannel) != 0)
1199   {
1200     intensity=0.0;
1201     for (i=0; i <= (ssize_t) MaxMap; i++)
1202     {
1203       intensity+=histogram[i].s[2];
1204       if (intensity > black_point)
1205         break;
1206     }
1207     black.w=(cl_float) i;
1208     intensity=0.0;
1209     for (i=(ssize_t) MaxMap; i != 0; i--)
1210     {
1211       intensity+=histogram[i].s[2];
1212       if (intensity > ((double) image->columns*image->rows-white_point))
1213         break;
1214     }
1215     white.w=(cl_float) i;
1216   }
1217 
1218   stretch_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL,
1219     sizeof(*stretch_map));
1220 
1221   if (stretch_map == (PixelPacket *) NULL)
1222     ThrowBinaryException(ResourceLimitError,"MemoryAllocationFailed",
1223       image->filename);
1224 
1225   /*
1226     Stretch the histogram to create the stretched image mapping.
1227   */
1228   (void) memset(stretch_map,0,(MaxMap+1)*sizeof(*stretch_map));
1229   for (i=0; i <= (ssize_t) MaxMap; i++)
1230   {
1231     if ((image->channel_mask & RedChannel) != 0)
1232     {
1233       if (i < (ssize_t) black.x)
1234         stretch_map[i].red=(Quantum) 0;
1235       else
1236         if (i > (ssize_t) white.x)
1237           stretch_map[i].red=QuantumRange;
1238         else
1239           if (black.x != white.x)
1240             stretch_map[i].red=ScaleMapToQuantum((MagickRealType) (MaxMap*
1241                   (i-black.x)/(white.x-black.x)));
1242     }
1243     if ((image->channel_mask & GreenChannel) != 0)
1244     {
1245       if (i < (ssize_t) black.y)
1246         stretch_map[i].green=0;
1247       else
1248         if (i > (ssize_t) white.y)
1249           stretch_map[i].green=QuantumRange;
1250         else
1251           if (black.y != white.y)
1252             stretch_map[i].green=ScaleMapToQuantum((MagickRealType) (MaxMap*
1253                   (i-black.y)/(white.y-black.y)));
1254     }
1255     if ((image->channel_mask & BlueChannel) != 0)
1256     {
1257       if (i < (ssize_t) black.z)
1258         stretch_map[i].blue=0;
1259       else
1260         if (i > (ssize_t) white.z)
1261           stretch_map[i].blue= QuantumRange;
1262         else
1263           if (black.z != white.z)
1264             stretch_map[i].blue=ScaleMapToQuantum((MagickRealType) (MaxMap*
1265                   (i-black.z)/(white.z-black.z)));
1266     }
1267     if ((image->channel_mask & AlphaChannel) != 0)
1268     {
1269       if (i < (ssize_t) black.w)
1270         stretch_map[i].alpha=0;
1271       else
1272         if (i > (ssize_t) white.w)
1273           stretch_map[i].alpha=QuantumRange;
1274         else
1275           if (black.w != white.w)
1276             stretch_map[i].alpha=ScaleMapToQuantum((MagickRealType) (MaxMap*
1277                   (i-black.w)/(white.w-black.w)));
1278     }
1279   }
1280 
1281   /*
1282     Stretch the image.
1283   */
1284   if (((image->channel_mask & AlphaChannel) != 0) || (((image->channel_mask & IndexChannel) != 0) &&
1285       (image->colorspace == CMYKColorspace)))
1286     image->storage_class=DirectClass;
1287   if (image->storage_class == PseudoClass)
1288   {
1289     /*
1290        Stretch colormap.
1291        */
1292     for (i=0; i < (ssize_t) image->colors; i++)
1293     {
1294       if ((image->channel_mask & RedChannel) != 0)
1295       {
1296         if (black.x != white.x)
1297           image->colormap[i].red=stretch_map[
1298             ScaleQuantumToMap(image->colormap[i].red)].red;
1299       }
1300       if ((image->channel_mask & GreenChannel) != 0)
1301       {
1302         if (black.y != white.y)
1303           image->colormap[i].green=stretch_map[
1304             ScaleQuantumToMap(image->colormap[i].green)].green;
1305       }
1306       if ((image->channel_mask & BlueChannel) != 0)
1307       {
1308         if (black.z != white.z)
1309           image->colormap[i].blue=stretch_map[
1310             ScaleQuantumToMap(image->colormap[i].blue)].blue;
1311       }
1312       if ((image->channel_mask & AlphaChannel) != 0)
1313       {
1314         if (black.w != white.w)
1315           image->colormap[i].alpha=stretch_map[
1316             ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
1317       }
1318     }
1319   }
1320 
1321   /*
1322     Stretch image.
1323   */
1324 
1325 
1326   /* GPU can work on this again, image and equalize map as input
1327     image:        uchar4 (CLPixelPacket)
1328     stretch_map:  uchar4 (PixelPacket)
1329     black, white: float4 (FloatPixelPacket) */
1330 
1331 #ifdef RECREATEBUFFER
1332   /* If the host pointer is aligned to the size of CLPixelPacket,
1333      then use the host buffer directly from the GPU; otherwise,
1334      create a buffer on the GPU and copy the data over */
1335   if (ALIGNED(inputPixels,CLPixelPacket))
1336   {
1337     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
1338   }
1339   else
1340   {
1341     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1342   }
1343   /* create a CL buffer from image pixel buffer */
1344   length = image->columns * image->rows;
1345   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1346   if (clStatus != CL_SUCCESS)
1347   {
1348     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1349     goto cleanup;
1350   }
1351 #endif
1352 
1353   /* Create and initialize OpenCL buffers. */
1354   if (ALIGNED(stretch_map, PixelPacket))
1355   {
1356     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1357     hostPtr = stretch_map;
1358   }
1359   else
1360   {
1361     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
1362     hostPtr = stretch_map;
1363   }
1364   /* create a CL buffer for stretch_map  */
1365   length = (MaxMap+1);
1366   stretchMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
1367   if (clStatus != CL_SUCCESS)
1368   {
1369     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1370     goto cleanup;
1371   }
1372 
1373   /* get the OpenCL kernel */
1374   stretchKernel = AcquireOpenCLKernel(device,"ContrastStretch");
1375   if (stretchKernel == NULL)
1376   {
1377     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1378     goto cleanup;
1379   }
1380 
1381   /* set the kernel arguments */
1382   i = 0;
1383   clStatus=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1384   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(ChannelType),&image->channel_mask);
1385   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_mem),(void *)&stretchMapBuffer);
1386   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&white);
1387   clStatus|=clEnv->library->clSetKernelArg(stretchKernel,i++,sizeof(cl_float4),&black);
1388   if (clStatus != CL_SUCCESS)
1389   {
1390     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1391     goto cleanup;
1392   }
1393 
1394   /* launch the kernel */
1395   global_work_size[0] = image->columns;
1396   global_work_size[1] = image->rows;
1397 
1398   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, stretchKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
1399 
1400   if (clStatus != CL_SUCCESS)
1401   {
1402     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1403     goto cleanup;
1404   }
1405   RecordProfileData(device,stretchKernel,event);
1406 
1407   /* read the data back */
1408   if (ALIGNED(inputPixels,CLPixelPacket))
1409   {
1410     length = image->columns * image->rows;
1411     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1412   }
1413   else
1414   {
1415     length = image->columns * image->rows;
1416     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
1417   }
1418   if (clStatus != CL_SUCCESS)
1419   {
1420     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1421     goto cleanup;
1422   }
1423 
1424   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
1425 
1426 cleanup:
1427 
1428   image_view=DestroyCacheView(image_view);
1429 
1430   if (imageBuffer!=NULL)
1431     clEnv->library->clReleaseMemObject(imageBuffer);
1432 
1433   if (stretchMapBuffer!=NULL)
1434     clEnv->library->clReleaseMemObject(stretchMapBuffer);
1435   if (stretch_map!=NULL)
1436     stretch_map=(PixelPacket *) RelinquishMagickMemory(stretch_map);
1437   if (histogramBuffer!=NULL)
1438     clEnv->library->clReleaseMemObject(histogramBuffer);
1439   if (histogram!=NULL)
1440     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
1441   if (histogramKernel!=NULL)
1442     ReleaseOpenCLKernel(histogramKernel);
1443   if (stretchKernel!=NULL)
1444     ReleaseOpenCLKernel(stretchKernel);
1445   if (queue != NULL)
1446     ReleaseOpenCLCommandQueue(device,queue);
1447   if (device != NULL)
1448     ReleaseOpenCLDevice(device);
1449 
1450   return(outputReady);
1451 }
1452 
AccelerateContrastStretchImage(Image * image,const double black_point,const double white_point,ExceptionInfo * exception)1453 MagickPrivate MagickBooleanType AccelerateContrastStretchImage(
1454   Image *image,const double black_point,const double white_point,
1455   ExceptionInfo *exception)
1456 {
1457   MagickBooleanType
1458     status;
1459 
1460   MagickCLEnv
1461     clEnv;
1462 
1463   assert(image != NULL);
1464   assert(exception != (ExceptionInfo *) NULL);
1465 
1466   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1467       (checkHistogramCondition(image,image->intensity) == MagickFalse))
1468     return(MagickFalse);
1469 
1470   clEnv=getOpenCLEnvironment(exception);
1471   if (clEnv == (MagickCLEnv) NULL)
1472     return(MagickFalse);
1473 
1474   status=ComputeContrastStretchImage(image,clEnv,black_point,white_point,
1475     exception);
1476   return(status);
1477 }
1478 
1479 /*
1480 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1481 %                                                                             %
1482 %                                                                             %
1483 %                                                                             %
1484 %     A c c e l e r a t e C o n v o l v e I m a g e                           %
1485 %                                                                             %
1486 %                                                                             %
1487 %                                                                             %
1488 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1489 */
1490 
ComputeConvolveImage(const Image * image,MagickCLEnv clEnv,const KernelInfo * kernel,ExceptionInfo * exception)1491 static Image *ComputeConvolveImage(const Image* image,MagickCLEnv clEnv,
1492   const KernelInfo *kernel,ExceptionInfo *exception)
1493 {
1494   CacheView
1495     *filteredImage_view,
1496     *image_view;
1497 
1498   cl_command_queue
1499     queue;
1500 
1501   cl_event
1502     event;
1503 
1504   cl_kernel
1505     clkernel;
1506 
1507   cl_int
1508     clStatus;
1509 
1510   cl_mem
1511     convolutionKernel,
1512     filteredImageBuffer,
1513     imageBuffer;
1514 
1515   cl_mem_flags
1516     mem_flags;
1517 
1518   const void
1519     *inputPixels;
1520 
1521   float
1522     *kernelBufferPtr;
1523 
1524   Image
1525     *filteredImage;
1526 
1527   MagickBooleanType
1528     outputReady;
1529 
1530   MagickCLDevice
1531     device;
1532 
1533   MagickSizeType
1534     length;
1535 
1536   size_t
1537     global_work_size[3],
1538     localGroupSize[3],
1539     localMemoryRequirement;
1540 
1541   unsigned
1542     kernelSize;
1543 
1544   unsigned int
1545     filterHeight,
1546     filterWidth,
1547     i,
1548     imageHeight,
1549     imageWidth,
1550     matte;
1551 
1552   void
1553     *filteredPixels,
1554     *hostPtr;
1555 
1556   /* intialize all CL objects to NULL */
1557   imageBuffer = NULL;
1558   filteredImageBuffer = NULL;
1559   convolutionKernel = NULL;
1560   clkernel = NULL;
1561   queue = NULL;
1562 
1563   filteredImage = NULL;
1564   filteredImage_view = NULL;
1565   outputReady = MagickFalse;
1566 
1567   device = RequestOpenCLDevice(clEnv);
1568 
1569   image_view=AcquireAuthenticCacheView(image,exception);
1570   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1571   if (inputPixels == (const void *) NULL)
1572   {
1573     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1574     goto cleanup;
1575   }
1576 
1577   /* Create and initialize OpenCL buffers. */
1578 
1579   /* If the host pointer is aligned to the size of CLPixelPacket,
1580      then use the host buffer directly from the GPU; otherwise,
1581      create a buffer on the GPU and copy the data over */
1582   if (ALIGNED(inputPixels,CLPixelPacket))
1583   {
1584     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1585   }
1586   else
1587   {
1588     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1589   }
1590   /* create a CL buffer from image pixel buffer */
1591   length = image->columns * image->rows;
1592   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1593   if (clStatus != CL_SUCCESS)
1594   {
1595     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1596     goto cleanup;
1597   }
1598 
1599   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1600   assert(filteredImage != NULL);
1601   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1602   {
1603     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1604     goto cleanup;
1605   }
1606   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1607   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1608   if (filteredPixels == (void *) NULL)
1609   {
1610     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1611     goto cleanup;
1612   }
1613 
1614   if (ALIGNED(filteredPixels,CLPixelPacket))
1615   {
1616     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1617     hostPtr = filteredPixels;
1618   }
1619   else
1620   {
1621     mem_flags = CL_MEM_WRITE_ONLY;
1622     hostPtr = NULL;
1623   }
1624   /* create a CL buffer from image pixel buffer */
1625   length = image->columns * image->rows;
1626   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1627   if (clStatus != CL_SUCCESS)
1628   {
1629     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1630     goto cleanup;
1631   }
1632 
1633   kernelSize = (unsigned int) (kernel->width * kernel->height);
1634   convolutionKernel = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, kernelSize * sizeof(float), NULL, &clStatus);
1635   if (clStatus != CL_SUCCESS)
1636   {
1637     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1638     goto cleanup;
1639   }
1640 
1641   queue = AcquireOpenCLCommandQueue(device);
1642 
1643   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, convolutionKernel, CL_TRUE, CL_MAP_WRITE, 0, kernelSize * sizeof(float)
1644           , 0, NULL, NULL, &clStatus);
1645   if (clStatus != CL_SUCCESS)
1646   {
1647     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueMapBuffer failed.",".");
1648     goto cleanup;
1649   }
1650   for (i = 0; i < kernelSize; i++)
1651   {
1652     kernelBufferPtr[i] = (float) kernel->values[i];
1653   }
1654   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, convolutionKernel, kernelBufferPtr, 0, NULL, NULL);
1655   if (clStatus != CL_SUCCESS)
1656   {
1657     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
1658     goto cleanup;
1659   }
1660 
1661   /* Compute the local memory requirement for a 16x16 workgroup.
1662      If it's larger than 16k, reduce the workgroup size to 8x8 */
1663   localGroupSize[0] = 16;
1664   localGroupSize[1] = 16;
1665   localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1666     + kernel->width*kernel->height*sizeof(float);
1667 
1668   if (localMemoryRequirement > device->local_memory_size)
1669   {
1670     localGroupSize[0] = 8;
1671     localGroupSize[1] = 8;
1672     localMemoryRequirement = (localGroupSize[0]+kernel->width-1) * (localGroupSize[1]+kernel->height-1) * sizeof(CLPixelPacket)
1673       + kernel->width*kernel->height*sizeof(float);
1674   }
1675   if (localMemoryRequirement <= device->local_memory_size)
1676   {
1677     /* get the OpenCL kernel */
1678     clkernel = AcquireOpenCLKernel(device,"ConvolveOptimized");
1679     if (clkernel == NULL)
1680     {
1681       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1682       goto cleanup;
1683     }
1684 
1685     /* set the kernel arguments */
1686     i = 0;
1687     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1688     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1689     imageWidth = (unsigned int) image->columns;
1690     imageHeight = (unsigned int) image->rows;
1691     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1692     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1693     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1694     filterWidth = (unsigned int) kernel->width;
1695     filterHeight = (unsigned int) kernel->height;
1696     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1697     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1698     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1699     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1700     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1701     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, (localGroupSize[0] + kernel->width-1)*(localGroupSize[1] + kernel->height-1)*sizeof(CLPixelPacket),NULL);
1702     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++, kernel->width*kernel->height*sizeof(float),NULL);
1703     if (clStatus != CL_SUCCESS)
1704     {
1705       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1706       goto cleanup;
1707     }
1708 
1709     /* pad the global size to a multiple of the local work size dimension */
1710     global_work_size[0] = ((image->columns + localGroupSize[0]  - 1)/localGroupSize[0] ) * localGroupSize[0] ;
1711     global_work_size[1] = ((image->rows + localGroupSize[1] - 1)/localGroupSize[1]) * localGroupSize[1];
1712 
1713     /* launch the kernel */
1714     clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1715     if (clStatus != CL_SUCCESS)
1716     {
1717       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1718       goto cleanup;
1719     }
1720     RecordProfileData(device,clkernel,event);
1721   }
1722   else
1723   {
1724     /* get the OpenCL kernel */
1725     clkernel = AcquireOpenCLKernel(device,"Convolve");
1726     if (clkernel == NULL)
1727     {
1728       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
1729       goto cleanup;
1730     }
1731 
1732     /* set the kernel arguments */
1733     i = 0;
1734     clStatus =clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
1735     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
1736     imageWidth = (unsigned int) image->columns;
1737     imageHeight = (unsigned int) image->rows;
1738     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageWidth);
1739     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&imageHeight);
1740     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(cl_mem),(void *)&convolutionKernel);
1741     filterWidth = (unsigned int) kernel->width;
1742     filterHeight = (unsigned int) kernel->height;
1743     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterWidth);
1744     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&filterHeight);
1745     matte = (image->alpha_trait > CopyPixelTrait)?1:0;
1746     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(unsigned int),(void *)&matte);
1747     clStatus|=clEnv->library->clSetKernelArg(clkernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
1748     if (clStatus != CL_SUCCESS)
1749     {
1750       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
1751       goto cleanup;
1752     }
1753 
1754     localGroupSize[0] = 8;
1755     localGroupSize[1] = 8;
1756     global_work_size[0] = (image->columns + (localGroupSize[0]-1))/localGroupSize[0] * localGroupSize[0];
1757     global_work_size[1] = (image->rows    + (localGroupSize[1]-1))/localGroupSize[1] * localGroupSize[1];
1758 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, clkernel, 2, NULL, global_work_size, localGroupSize, 0, NULL, &event);
1759 
1760     if (clStatus != CL_SUCCESS)
1761     {
1762       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
1763       goto cleanup;
1764     }
1765   }
1766   RecordProfileData(device,clkernel,event);
1767 
1768   if (ALIGNED(filteredPixels,CLPixelPacket))
1769   {
1770     length = image->columns * image->rows;
1771     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
1772   }
1773   else
1774   {
1775     length = image->columns * image->rows;
1776     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
1777   }
1778   if (clStatus != CL_SUCCESS)
1779   {
1780     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
1781     goto cleanup;
1782   }
1783 
1784   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
1785 
1786 cleanup:
1787 
1788   image_view=DestroyCacheView(image_view);
1789   if (filteredImage_view != NULL)
1790     filteredImage_view=DestroyCacheView(filteredImage_view);
1791   if (imageBuffer != NULL)
1792     clEnv->library->clReleaseMemObject(imageBuffer);
1793   if (filteredImageBuffer != NULL)
1794     clEnv->library->clReleaseMemObject(filteredImageBuffer);
1795   if (convolutionKernel != NULL)
1796     clEnv->library->clReleaseMemObject(convolutionKernel);
1797   if (clkernel != NULL)
1798     ReleaseOpenCLKernel(clkernel);
1799   if (queue != NULL)
1800     ReleaseOpenCLCommandQueue(device,queue);
1801   if (device != NULL)
1802     ReleaseOpenCLDevice(device);
1803   if (outputReady == MagickFalse)
1804   {
1805     if (filteredImage != NULL)
1806     {
1807       DestroyImage(filteredImage);
1808       filteredImage = NULL;
1809     }
1810   }
1811 
1812   return(filteredImage);
1813 }
1814 
AccelerateConvolveImage(const Image * image,const KernelInfo * kernel,ExceptionInfo * exception)1815 MagickPrivate Image *AccelerateConvolveImage(const Image *image,
1816   const KernelInfo *kernel,ExceptionInfo *exception)
1817 {
1818   /* Temporary disabled due to access violation
1819 
1820   Image
1821     *filteredImage;
1822 
1823   assert(image != NULL);
1824   assert(kernel != (KernelInfo *) NULL);
1825   assert(exception != (ExceptionInfo *) NULL);
1826   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
1827       (checkOpenCLEnvironment(exception) == MagickFalse))
1828     return((Image *) NULL);
1829 
1830   filteredImage=ComputeConvolveImage(image,kernel,exception);
1831   return(filteredImage);
1832   */
1833   magick_unreferenced(image);
1834   magick_unreferenced(kernel);
1835   magick_unreferenced(exception);
1836   return((Image *)NULL);
1837 }
1838 
1839 /*
1840 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1841 %                                                                             %
1842 %                                                                             %
1843 %                                                                             %
1844 %     A c c e l e r a t e D e s p e c k l e I m a g e                         %
1845 %                                                                             %
1846 %                                                                             %
1847 %                                                                             %
1848 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
1849 */
1850 
ComputeDespeckleImage(const Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)1851 static Image *ComputeDespeckleImage(const Image *image,MagickCLEnv clEnv,
1852   ExceptionInfo*exception)
1853 {
1854   static const int
1855     X[4] = {0, 1, 1,-1},
1856     Y[4] = {1, 0, 1, 1};
1857 
1858   CacheView
1859     *filteredImage_view,
1860     *image_view;
1861 
1862   cl_command_queue
1863     queue;
1864 
1865   cl_int
1866     clStatus;
1867 
1868   cl_kernel
1869     hullPass1,
1870     hullPass2;
1871 
1872   cl_event
1873     event;
1874 
1875   cl_mem_flags
1876     mem_flags;
1877 
1878   cl_mem
1879     filteredImageBuffer,
1880     imageBuffer,
1881     tempImageBuffer[2];
1882 
1883   const void
1884     *inputPixels;
1885 
1886   Image
1887     *filteredImage;
1888 
1889   int
1890     k,
1891     matte;
1892 
1893   MagickBooleanType
1894     outputReady;
1895 
1896   MagickCLDevice
1897     device;
1898 
1899   MagickSizeType
1900     length;
1901 
1902   size_t
1903     global_work_size[2];
1904 
1905   unsigned int
1906     imageHeight,
1907     imageWidth;
1908 
1909   void
1910     *filteredPixels,
1911     *hostPtr;
1912 
1913   outputReady = MagickFalse;
1914   inputPixels = NULL;
1915   filteredImage = NULL;
1916   filteredImage_view = NULL;
1917   filteredPixels = NULL;
1918   imageBuffer = NULL;
1919   filteredImageBuffer = NULL;
1920   hullPass1 = NULL;
1921   hullPass2 = NULL;
1922   queue = NULL;
1923   tempImageBuffer[0] = tempImageBuffer[1] = NULL;
1924 
1925   device = RequestOpenCLDevice(clEnv);
1926   queue = AcquireOpenCLCommandQueue(device);
1927 
1928   image_view=AcquireAuthenticCacheView(image,exception);
1929   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
1930   if (inputPixels == (void *) NULL)
1931   {
1932     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
1933     goto cleanup;
1934   }
1935 
1936   if (ALIGNED(inputPixels,CLPixelPacket))
1937   {
1938     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
1939   }
1940   else
1941   {
1942     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
1943   }
1944   /* create a CL buffer from image pixel buffer */
1945   length = image->columns * image->rows;
1946   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
1947   if (clStatus != CL_SUCCESS)
1948   {
1949     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1950     goto cleanup;
1951   }
1952 
1953   mem_flags = CL_MEM_READ_WRITE;
1954   length = image->columns * image->rows;
1955   for (k = 0; k < 2; k++)
1956   {
1957     tempImageBuffer[k] = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), NULL, &clStatus);
1958     if (clStatus != CL_SUCCESS)
1959     {
1960       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1961       goto cleanup;
1962     }
1963   }
1964 
1965   filteredImage = CloneImage(image,0,0,MagickTrue,exception);
1966   assert(filteredImage != NULL);
1967   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
1968   {
1969     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
1970     goto cleanup;
1971   }
1972   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
1973   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
1974   if (filteredPixels == (void *) NULL)
1975   {
1976     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
1977     goto cleanup;
1978   }
1979 
1980   if (ALIGNED(filteredPixels,CLPixelPacket))
1981   {
1982     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
1983     hostPtr = filteredPixels;
1984   }
1985   else
1986   {
1987     mem_flags = CL_MEM_WRITE_ONLY;
1988     hostPtr = NULL;
1989   }
1990   /* create a CL buffer from image pixel buffer */
1991   length = image->columns * image->rows;
1992   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
1993   if (clStatus != CL_SUCCESS)
1994   {
1995     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
1996     goto cleanup;
1997   }
1998 
1999   hullPass1 = AcquireOpenCLKernel(device,"HullPass1");
2000   hullPass2 = AcquireOpenCLKernel(device,"HullPass2");
2001 
2002   clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)&imageBuffer);
2003   clStatus |=clEnv->library->clSetKernelArg(hullPass1,1,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2004   imageWidth = (unsigned int) image->columns;
2005   clStatus |=clEnv->library->clSetKernelArg(hullPass1,2,sizeof(unsigned int),(void *)&imageWidth);
2006   imageHeight = (unsigned int) image->rows;
2007   clStatus |=clEnv->library->clSetKernelArg(hullPass1,3,sizeof(unsigned int),(void *)&imageHeight);
2008   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2009   clStatus |=clEnv->library->clSetKernelArg(hullPass1,6,sizeof(int),(void *)&matte);
2010   if (clStatus != CL_SUCCESS)
2011   {
2012     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2013     goto cleanup;
2014   }
2015 
2016   clStatus = clEnv->library->clSetKernelArg(hullPass2,0,sizeof(cl_mem),(void *)(tempImageBuffer+1));
2017   clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)tempImageBuffer);
2018   imageWidth = (unsigned int) image->columns;
2019   clStatus |=clEnv->library->clSetKernelArg(hullPass2,2,sizeof(unsigned int),(void *)&imageWidth);
2020   imageHeight = (unsigned int) image->rows;
2021   clStatus |=clEnv->library->clSetKernelArg(hullPass2,3,sizeof(unsigned int),(void *)&imageHeight);
2022   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
2023   clStatus |=clEnv->library->clSetKernelArg(hullPass2,6,sizeof(int),(void *)&matte);
2024   if (clStatus != CL_SUCCESS)
2025   {
2026     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2027     goto cleanup;
2028   }
2029 
2030 
2031   global_work_size[0] = image->columns;
2032   global_work_size[1] = image->rows;
2033 
2034 
2035   for (k = 0; k < 4; k++)
2036   {
2037     cl_int2 offset;
2038     int polarity;
2039 
2040 
2041     offset.s[0] = X[k];
2042     offset.s[1] = Y[k];
2043     polarity = 1;
2044     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2045     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2046     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2047     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2048     if (clStatus != CL_SUCCESS)
2049     {
2050       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2051       goto cleanup;
2052     }
2053     /* launch the kernel */
2054 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2055     if (clStatus != CL_SUCCESS)
2056     {
2057       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2058       goto cleanup;
2059     }
2060     RecordProfileData(device,hullPass1,event);
2061 
2062     /* launch the kernel */
2063 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2064     if (clStatus != CL_SUCCESS)
2065     {
2066       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2067       goto cleanup;
2068     }
2069     RecordProfileData(device,hullPass2,event);
2070 
2071     if (k == 0)
2072       clStatus =clEnv->library->clSetKernelArg(hullPass1,0,sizeof(cl_mem),(void *)(tempImageBuffer));
2073     offset.s[0] = -X[k];
2074     offset.s[1] = -Y[k];
2075     polarity = 1;
2076     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2077     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2078     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2079     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2080     if (clStatus != CL_SUCCESS)
2081     {
2082       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2083       goto cleanup;
2084     }
2085     /* launch the kernel */
2086 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2087     if (clStatus != CL_SUCCESS)
2088     {
2089       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2090       goto cleanup;
2091     }
2092     RecordProfileData(device,hullPass1,event);
2093 
2094     /* launch the kernel */
2095 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2096     if (clStatus != CL_SUCCESS)
2097     {
2098       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2099       goto cleanup;
2100     }
2101     RecordProfileData(device,hullPass2,event);
2102 
2103     offset.s[0] = -X[k];
2104     offset.s[1] = -Y[k];
2105     polarity = -1;
2106     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2107     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2108     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2109     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2110     if (clStatus != CL_SUCCESS)
2111     {
2112       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2113       goto cleanup;
2114     }
2115     /* launch the kernel */
2116 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2117     if (clStatus != CL_SUCCESS)
2118     {
2119       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2120       goto cleanup;
2121     }
2122     RecordProfileData(device,hullPass1,event);
2123 
2124     /* launch the kernel */
2125 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2126     if (clStatus != CL_SUCCESS)
2127     {
2128       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2129       goto cleanup;
2130     }
2131     RecordProfileData(device,hullPass2,event);
2132 
2133     offset.s[0] = X[k];
2134     offset.s[1] = Y[k];
2135     polarity = -1;
2136     clStatus = clEnv->library->clSetKernelArg(hullPass1,4,sizeof(cl_int2),(void *)&offset);
2137     clStatus|= clEnv->library->clSetKernelArg(hullPass1,5,sizeof(int),(void *)&polarity);
2138     clStatus|=clEnv->library->clSetKernelArg(hullPass2,4,sizeof(cl_int2),(void *)&offset);
2139     clStatus|=clEnv->library->clSetKernelArg(hullPass2,5,sizeof(int),(void *)&polarity);
2140 
2141     if (k == 3)
2142       clStatus |=clEnv->library->clSetKernelArg(hullPass2,1,sizeof(cl_mem),(void *)&filteredImageBuffer);
2143 
2144     if (clStatus != CL_SUCCESS)
2145     {
2146       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2147       goto cleanup;
2148     }
2149     /* launch the kernel */
2150 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass1, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2151     if (clStatus != CL_SUCCESS)
2152     {
2153       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2154       goto cleanup;
2155     }
2156     RecordProfileData(device,hullPass1,event);
2157 
2158     /* launch the kernel */
2159 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, hullPass2, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2160     if (clStatus != CL_SUCCESS)
2161     {
2162       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2163       goto cleanup;
2164     }
2165     RecordProfileData(device,hullPass2,event);
2166   }
2167 
2168   if (ALIGNED(filteredPixels,CLPixelPacket))
2169   {
2170     length = image->columns * image->rows;
2171     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2172   }
2173   else
2174   {
2175     length = image->columns * image->rows;
2176     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
2177   }
2178   if (clStatus != CL_SUCCESS)
2179   {
2180     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2181     goto cleanup;
2182   }
2183 
2184   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
2185 
2186 cleanup:
2187 
2188   image_view=DestroyCacheView(image_view);
2189   if (filteredImage_view != NULL)
2190     filteredImage_view=DestroyCacheView(filteredImage_view);
2191 
2192   if (queue != NULL)
2193     ReleaseOpenCLCommandQueue(device,queue);
2194   if (device != NULL)
2195     ReleaseOpenCLDevice(device);
2196   if (imageBuffer!=NULL)
2197     clEnv->library->clReleaseMemObject(imageBuffer);
2198   for (k = 0; k < 2; k++)
2199   {
2200     if (tempImageBuffer[k]!=NULL)
2201       clEnv->library->clReleaseMemObject(tempImageBuffer[k]);
2202   }
2203   if (filteredImageBuffer!=NULL)
2204     clEnv->library->clReleaseMemObject(filteredImageBuffer);
2205   if (hullPass1!=NULL)
2206     ReleaseOpenCLKernel(hullPass1);
2207   if (hullPass2!=NULL)
2208     ReleaseOpenCLKernel(hullPass2);
2209   if (outputReady == MagickFalse && filteredImage != NULL)
2210     filteredImage=DestroyImage(filteredImage);
2211 
2212   return(filteredImage);
2213 }
2214 
AccelerateDespeckleImage(const Image * image,ExceptionInfo * exception)2215 MagickPrivate Image *AccelerateDespeckleImage(const Image* image,
2216   ExceptionInfo* exception)
2217 {
2218   Image
2219     *filteredImage;
2220 
2221   MagickCLEnv
2222     clEnv;
2223 
2224   assert(image != NULL);
2225   assert(exception != (ExceptionInfo *) NULL);
2226 
2227   if (checkAccelerateConditionRGBA(image) == MagickFalse)
2228     return((Image *) NULL);
2229 
2230   clEnv=getOpenCLEnvironment(exception);
2231   if (clEnv == (MagickCLEnv) NULL)
2232     return((Image *) NULL);
2233 
2234   filteredImage=ComputeDespeckleImage(image,clEnv,exception);
2235   return(filteredImage);
2236 }
2237 
2238 /*
2239 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2240 %                                                                             %
2241 %                                                                             %
2242 %                                                                             %
2243 %     A c c e l e r a t e E q u a l i z e I m a g e                           %
2244 %                                                                             %
2245 %                                                                             %
2246 %                                                                             %
2247 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2248 */
2249 
ComputeEqualizeImage(Image * image,MagickCLEnv clEnv,ExceptionInfo * exception)2250 static MagickBooleanType ComputeEqualizeImage(Image *image,MagickCLEnv clEnv,
2251   ExceptionInfo *exception)
2252 {
2253 #define EqualizeImageTag  "Equalize/Image"
2254 
2255   CacheView
2256     *image_view;
2257 
2258   cl_command_queue
2259     queue;
2260 
2261   cl_int
2262     clStatus;
2263 
2264   cl_mem_flags
2265     mem_flags;
2266 
2267   cl_mem
2268     equalizeMapBuffer,
2269     histogramBuffer,
2270     imageBuffer;
2271 
2272   cl_kernel
2273     equalizeKernel,
2274     histogramKernel;
2275 
2276   cl_event
2277     event;
2278 
2279   cl_uint4
2280     *histogram;
2281 
2282   cl_float4
2283     white,
2284     black,
2285     intensity,
2286     *map;
2287 
2288   MagickBooleanType
2289     outputReady,
2290     status;
2291 
2292   MagickCLDevice
2293     device;
2294 
2295   MagickSizeType
2296     length;
2297 
2298   PixelPacket
2299     *equalize_map;
2300 
2301   register ssize_t
2302     i;
2303 
2304   size_t
2305     global_work_size[2];
2306 
2307   void
2308     *hostPtr,
2309     *inputPixels;
2310 
2311   map=NULL;
2312   histogram=NULL;
2313   equalize_map=NULL;
2314   inputPixels = NULL;
2315   imageBuffer = NULL;
2316   histogramBuffer = NULL;
2317   equalizeMapBuffer = NULL;
2318   histogramKernel = NULL;
2319   equalizeKernel = NULL;
2320   queue = NULL;
2321   outputReady = MagickFalse;
2322 
2323   assert(image != (Image *) NULL);
2324   assert(image->signature == MagickCoreSignature);
2325   if (image->debug != MagickFalse)
2326     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
2327 
2328   /*
2329    * initialize opencl env
2330    */
2331   device = RequestOpenCLDevice(clEnv);
2332   queue = AcquireOpenCLCommandQueue(device);
2333 
2334   /*
2335     Allocate and initialize histogram arrays.
2336   */
2337   histogram=(cl_uint4 *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*histogram));
2338   if (histogram == (cl_uint4 *) NULL)
2339       ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2340 
2341   /* reset histogram */
2342   (void) memset(histogram,0,(MaxMap+1)*sizeof(*histogram));
2343 
2344   /* Create and initialize OpenCL buffers. */
2345   /* inputPixels = AcquirePixelCachePixels(image, &length, exception); */
2346   /* assume this  will get a writable image */
2347   image_view=AcquireAuthenticCacheView(image,exception);
2348   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
2349 
2350   if (inputPixels == (void *) NULL)
2351   {
2352     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
2353     goto cleanup;
2354   }
2355   /* If the host pointer is aligned to the size of CLPixelPacket,
2356      then use the host buffer directly from the GPU; otherwise,
2357      create a buffer on the GPU and copy the data over */
2358   if (ALIGNED(inputPixels,CLPixelPacket))
2359   {
2360     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2361   }
2362   else
2363   {
2364     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
2365   }
2366   /* create a CL buffer from image pixel buffer */
2367   length = image->columns * image->rows;
2368   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2369   if (clStatus != CL_SUCCESS)
2370   {
2371     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2372     goto cleanup;
2373   }
2374 
2375   /* If the host pointer is aligned to the size of cl_uint,
2376      then use the host buffer directly from the GPU; otherwise,
2377      create a buffer on the GPU and copy the data over */
2378   if (ALIGNED(histogram,cl_uint4))
2379   {
2380     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2381     hostPtr = histogram;
2382   }
2383   else
2384   {
2385     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2386     hostPtr = histogram;
2387   }
2388   /* create a CL buffer for histogram  */
2389   length = (MaxMap+1);
2390   histogramBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(cl_uint4), hostPtr, &clStatus);
2391   if (clStatus != CL_SUCCESS)
2392   {
2393     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2394     goto cleanup;
2395   }
2396 
2397   status = LaunchHistogramKernel(clEnv, device, queue, imageBuffer, histogramBuffer, image, image->channel_mask, exception);
2398   if (status == MagickFalse)
2399     goto cleanup;
2400 
2401   /* read from the kenel output */
2402   if (ALIGNED(histogram,cl_uint4))
2403   {
2404     length = (MaxMap+1);
2405     clEnv->library->clEnqueueMapBuffer(queue, histogramBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(cl_uint4), 0, NULL, NULL, &clStatus);
2406   }
2407   else
2408   {
2409     length = (MaxMap+1);
2410     clStatus = clEnv->library->clEnqueueReadBuffer(queue, histogramBuffer, CL_TRUE, 0, length * sizeof(cl_uint4), histogram, 0, NULL, NULL);
2411   }
2412   if (clStatus != CL_SUCCESS)
2413   {
2414     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2415     goto cleanup;
2416   }
2417 
2418   /* unmap, don't block gpu to use this buffer again.  */
2419   if (ALIGNED(histogram,cl_uint4))
2420   {
2421     clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, histogramBuffer, histogram, 0, NULL, NULL);
2422     if (clStatus != CL_SUCCESS)
2423     {
2424       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
2425       goto cleanup;
2426     }
2427   }
2428 
2429   /* recreate input buffer later, in case image updated */
2430 #ifdef RECREATEBUFFER
2431   if (imageBuffer!=NULL)
2432     clEnv->library->clReleaseMemObject(imageBuffer);
2433 #endif
2434 
2435   /* CPU stuff */
2436   equalize_map=(PixelPacket *) AcquireQuantumMemory(MaxMap+1UL, sizeof(*equalize_map));
2437   if (equalize_map == (PixelPacket *) NULL)
2438     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2439 
2440   map=(cl_float4 *) AcquireQuantumMemory(MaxMap+1UL,sizeof(*map));
2441   if (map == (cl_float4 *) NULL)
2442     ThrowBinaryException(ResourceLimitWarning,"MemoryAllocationFailed", image->filename);
2443 
2444   /*
2445     Integrate the histogram to get the equalization map.
2446   */
2447   (void) memset(&intensity,0,sizeof(intensity));
2448   for (i=0; i <= (ssize_t) MaxMap; i++)
2449   {
2450     if ((image->channel_mask & SyncChannels) != 0)
2451     {
2452       intensity.x+=histogram[i].s[2];
2453       map[i]=intensity;
2454       continue;
2455     }
2456     if ((image->channel_mask & RedChannel) != 0)
2457       intensity.x+=histogram[i].s[2];
2458     if ((image->channel_mask & GreenChannel) != 0)
2459       intensity.y+=histogram[i].s[1];
2460     if ((image->channel_mask & BlueChannel) != 0)
2461       intensity.z+=histogram[i].s[0];
2462     if ((image->channel_mask & AlphaChannel) != 0)
2463       intensity.w+=histogram[i].s[3];
2464     map[i]=intensity;
2465   }
2466   black=map[0];
2467   white=map[(int) MaxMap];
2468   (void) memset(equalize_map,0,(MaxMap+1)*sizeof(*equalize_map));
2469   for (i=0; i <= (ssize_t) MaxMap; i++)
2470   {
2471     if ((image->channel_mask & SyncChannels) != 0)
2472     {
2473       if (white.x != black.x)
2474         equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2475                 (map[i].x-black.x))/(white.x-black.x)));
2476       continue;
2477     }
2478     if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2479       equalize_map[i].red=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2480               (map[i].x-black.x))/(white.x-black.x)));
2481     if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2482       equalize_map[i].green=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2483               (map[i].y-black.y))/(white.y-black.y)));
2484     if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2485       equalize_map[i].blue=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2486               (map[i].z-black.z))/(white.z-black.z)));
2487     if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2488       equalize_map[i].alpha=ScaleMapToQuantum((MagickRealType) ((MaxMap*
2489               (map[i].w-black.w))/(white.w-black.w)));
2490   }
2491 
2492   if (image->storage_class == PseudoClass)
2493   {
2494     /*
2495        Equalize colormap.
2496        */
2497     for (i=0; i < (ssize_t) image->colors; i++)
2498     {
2499       if ((image->channel_mask & SyncChannels) != 0)
2500       {
2501         if (white.x != black.x)
2502         {
2503           image->colormap[i].red=equalize_map[
2504             ScaleQuantumToMap(image->colormap[i].red)].red;
2505           image->colormap[i].green=equalize_map[
2506             ScaleQuantumToMap(image->colormap[i].green)].red;
2507           image->colormap[i].blue=equalize_map[
2508             ScaleQuantumToMap(image->colormap[i].blue)].red;
2509           image->colormap[i].alpha=equalize_map[
2510             ScaleQuantumToMap(image->colormap[i].alpha)].red;
2511         }
2512         continue;
2513       }
2514       if (((image->channel_mask & RedChannel) != 0) && (white.x != black.x))
2515         image->colormap[i].red=equalize_map[
2516           ScaleQuantumToMap(image->colormap[i].red)].red;
2517       if (((image->channel_mask & GreenChannel) != 0) && (white.y != black.y))
2518         image->colormap[i].green=equalize_map[
2519           ScaleQuantumToMap(image->colormap[i].green)].green;
2520       if (((image->channel_mask & BlueChannel) != 0) && (white.z != black.z))
2521         image->colormap[i].blue=equalize_map[
2522           ScaleQuantumToMap(image->colormap[i].blue)].blue;
2523       if (((image->channel_mask & AlphaChannel) != 0) && (white.w != black.w))
2524         image->colormap[i].alpha=equalize_map[
2525           ScaleQuantumToMap(image->colormap[i].alpha)].alpha;
2526     }
2527   }
2528 
2529   /*
2530     Equalize image.
2531   */
2532 
2533   /* GPU can work on this again, image and equalize map as input
2534     image:        uchar4 (CLPixelPacket)
2535     equalize_map: uchar4 (PixelPacket)
2536     black, white: float4 (FloatPixelPacket) */
2537 
2538 #ifdef RECREATEBUFFER
2539   /* If the host pointer is aligned to the size of CLPixelPacket,
2540      then use the host buffer directly from the GPU; otherwise,
2541      create a buffer on the GPU and copy the data over */
2542   if (ALIGNED(inputPixels,CLPixelPacket))
2543   {
2544     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
2545   }
2546   else
2547   {
2548     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2549   }
2550   /* create a CL buffer from image pixel buffer */
2551   length = image->columns * image->rows;
2552   imageBuffer = clEnv->library->clCreateBuffer(context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
2553   if (clStatus != CL_SUCCESS)
2554   {
2555     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2556     goto cleanup;
2557   }
2558 #endif
2559 
2560   /* Create and initialize OpenCL buffers. */
2561   if (ALIGNED(equalize_map, PixelPacket))
2562   {
2563     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
2564     hostPtr = equalize_map;
2565   }
2566   else
2567   {
2568     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
2569     hostPtr = equalize_map;
2570   }
2571   /* create a CL buffer for eqaulize_map  */
2572   length = (MaxMap+1);
2573   equalizeMapBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(PixelPacket), hostPtr, &clStatus);
2574   if (clStatus != CL_SUCCESS)
2575   {
2576     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
2577     goto cleanup;
2578   }
2579 
2580   /* get the OpenCL kernel */
2581   equalizeKernel = AcquireOpenCLKernel(device,"Equalize");
2582   if (equalizeKernel == NULL)
2583   {
2584     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
2585     goto cleanup;
2586   }
2587 
2588   /* set the kernel arguments */
2589   i = 0;
2590   clStatus=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2591   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(ChannelType),&image->channel_mask);
2592   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_mem),(void *)&equalizeMapBuffer);
2593   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&white);
2594   clStatus|=clEnv->library->clSetKernelArg(equalizeKernel,i++,sizeof(cl_float4),&black);
2595   if (clStatus != CL_SUCCESS)
2596   {
2597     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
2598     goto cleanup;
2599   }
2600 
2601   /* launch the kernel */
2602   global_work_size[0] = image->columns;
2603   global_work_size[1] = image->rows;
2604 
2605   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, equalizeKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
2606 
2607   if (clStatus != CL_SUCCESS)
2608   {
2609     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
2610     goto cleanup;
2611   }
2612   RecordProfileData(device,equalizeKernel,event);
2613 
2614   /* read the data back */
2615   if (ALIGNED(inputPixels,CLPixelPacket))
2616   {
2617     length = image->columns * image->rows;
2618     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
2619   }
2620   else
2621   {
2622     length = image->columns * image->rows;
2623     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
2624   }
2625   if (clStatus != CL_SUCCESS)
2626   {
2627     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
2628     goto cleanup;
2629   }
2630 
2631   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
2632 
2633 cleanup:
2634 
2635   image_view=DestroyCacheView(image_view);
2636 
2637   if (imageBuffer!=NULL)
2638     clEnv->library->clReleaseMemObject(imageBuffer);
2639   if (map!=NULL)
2640     map=(cl_float4 *) RelinquishMagickMemory(map);
2641   if (equalizeMapBuffer!=NULL)
2642     clEnv->library->clReleaseMemObject(equalizeMapBuffer);
2643   if (equalize_map!=NULL)
2644     equalize_map=(PixelPacket *) RelinquishMagickMemory(equalize_map);
2645   if (histogramBuffer!=NULL)
2646     clEnv->library->clReleaseMemObject(histogramBuffer);
2647   if (histogram!=NULL)
2648     histogram=(cl_uint4 *) RelinquishMagickMemory(histogram);
2649   if (histogramKernel!=NULL)
2650     ReleaseOpenCLKernel(histogramKernel);
2651   if (equalizeKernel!=NULL)
2652     ReleaseOpenCLKernel(equalizeKernel);
2653   if (queue != NULL)
2654     ReleaseOpenCLCommandQueue(device, queue);
2655   if (device != NULL)
2656     ReleaseOpenCLDevice(device);
2657 
2658   return(outputReady);
2659 }
2660 
AccelerateEqualizeImage(Image * image,ExceptionInfo * exception)2661 MagickPrivate MagickBooleanType AccelerateEqualizeImage(Image *image,
2662   ExceptionInfo *exception)
2663 {
2664   MagickBooleanType
2665     status;
2666 
2667   MagickCLEnv
2668     clEnv;
2669 
2670   assert(image != NULL);
2671   assert(exception != (ExceptionInfo *) NULL);
2672 
2673   if ((checkAccelerateConditionRGBA(image) == MagickFalse) ||
2674       (checkHistogramCondition(image,image->intensity) == MagickFalse))
2675     return(MagickFalse);
2676 
2677   clEnv=getOpenCLEnvironment(exception);
2678   if (clEnv == (MagickCLEnv) NULL)
2679     return(MagickFalse);
2680 
2681   status=ComputeEqualizeImage(image,clEnv,exception);
2682   return(status);
2683 }
2684 
2685 /*
2686 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2687 %                                                                             %
2688 %                                                                             %
2689 %                                                                             %
2690 %     A c c e l e r a t e F u n c t i o n I m a g e                           %
2691 %                                                                             %
2692 %                                                                             %
2693 %                                                                             %
2694 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2695 */
2696 
ComputeFunctionImage(Image * image,MagickCLEnv clEnv,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2697 static MagickBooleanType ComputeFunctionImage(Image *image,MagickCLEnv clEnv,
2698   const MagickFunction function,const size_t number_parameters,
2699   const double *parameters,ExceptionInfo *exception)
2700 {
2701   cl_command_queue
2702     queue;
2703 
2704   cl_int
2705     status;
2706 
2707   cl_kernel
2708     functionKernel;
2709 
2710   cl_mem
2711     imageBuffer,
2712     parametersBuffer;
2713 
2714   cl_uint
2715     number_params,
2716     number_channels;
2717 
2718   float
2719     *parametersBufferPtr;
2720 
2721   MagickBooleanType
2722     outputReady;
2723 
2724   MagickCLDevice
2725     device;
2726 
2727   size_t
2728     gsize[2],
2729     i;
2730 
2731   outputReady=MagickFalse;
2732 
2733   imageBuffer=NULL;
2734   functionKernel=NULL;
2735   parametersBuffer=NULL;
2736 
2737   device=RequestOpenCLDevice(clEnv);
2738   queue=AcquireOpenCLCommandQueue(device);
2739   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2740   if (imageBuffer == (cl_mem) NULL)
2741     goto cleanup;
2742 
2743   parametersBufferPtr=(float *) AcquireQuantumMemory(number_parameters,
2744     sizeof(float));
2745   if (parametersBufferPtr == (float *) NULL)
2746     goto cleanup;
2747   for (i=0; i<number_parameters; i++)
2748     parametersBufferPtr[i]=(float) parameters[i];
2749   parametersBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
2750     CL_MEM_COPY_HOST_PTR,number_parameters*sizeof(*parametersBufferPtr),
2751     parametersBufferPtr);
2752   parametersBufferPtr=RelinquishMagickMemory(parametersBufferPtr);
2753   if (parametersBuffer == (cl_mem) NULL)
2754   {
2755     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2756       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
2757     goto cleanup;
2758   }
2759 
2760   functionKernel=AcquireOpenCLKernel(device,"ComputeFunction");
2761   if (functionKernel == (cl_kernel) NULL)
2762   {
2763     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2764       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2765     goto cleanup;
2766   }
2767 
2768   number_channels=(cl_uint) image->number_channels;
2769   number_params=(cl_uint) number_parameters;
2770 
2771   i=0;
2772   status =SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2773   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_channels);
2774   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
2775   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(MagickFunction),(void *)&function);
2776   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_uint),(void *)&number_params);
2777   status|=SetOpenCLKernelArg(functionKernel,i++,sizeof(cl_mem),(void *)&parametersBuffer);
2778   if (status != CL_SUCCESS)
2779   {
2780     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2781       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2782     goto cleanup;
2783   }
2784 
2785   gsize[0]=image->columns;
2786   gsize[1]=image->rows;
2787   outputReady=EnqueueOpenCLKernel(queue,functionKernel,2,(const size_t *) NULL,
2788     gsize,(const size_t *) NULL,image,(const Image *) NULL,MagickFalse,
2789     exception);
2790 
2791 cleanup:
2792 
2793   if (imageBuffer != (cl_mem) NULL)
2794     ReleaseOpenCLMemObject(imageBuffer);
2795   if (parametersBuffer != (cl_mem) NULL)
2796     ReleaseOpenCLMemObject(parametersBuffer);
2797   if (functionKernel != (cl_kernel) NULL)
2798     ReleaseOpenCLKernel(functionKernel);
2799   if (queue != (cl_command_queue) NULL)
2800     ReleaseOpenCLCommandQueue(device,queue);
2801   if (device != (MagickCLDevice) NULL)
2802     ReleaseOpenCLDevice(device);
2803   return(outputReady);
2804 }
2805 
AccelerateFunctionImage(Image * image,const MagickFunction function,const size_t number_parameters,const double * parameters,ExceptionInfo * exception)2806 MagickPrivate MagickBooleanType AccelerateFunctionImage(Image *image,
2807   const MagickFunction function,const size_t number_parameters,
2808   const double *parameters,ExceptionInfo *exception)
2809 {
2810   MagickBooleanType
2811     status;
2812 
2813   MagickCLEnv
2814     clEnv;
2815 
2816   assert(image != NULL);
2817   assert(exception != (ExceptionInfo *) NULL);
2818 
2819   if (checkAccelerateCondition(image) == MagickFalse)
2820     return(MagickFalse);
2821 
2822   clEnv=getOpenCLEnvironment(exception);
2823   if (clEnv == (MagickCLEnv) NULL)
2824     return(MagickFalse);
2825 
2826   status=ComputeFunctionImage(image,clEnv,function,number_parameters,
2827     parameters,exception);
2828   return(status);
2829 }
2830 
2831 /*
2832 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2833 %                                                                             %
2834 %                                                                             %
2835 %                                                                             %
2836 %     A c c e l e r a t e G r a y s c a l e I m a g e                         %
2837 %                                                                             %
2838 %                                                                             %
2839 %                                                                             %
2840 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2841 */
2842 
ComputeGrayscaleImage(Image * image,MagickCLEnv clEnv,const PixelIntensityMethod method,ExceptionInfo * exception)2843 static MagickBooleanType ComputeGrayscaleImage(Image *image,MagickCLEnv clEnv,
2844   const PixelIntensityMethod method,ExceptionInfo *exception)
2845 {
2846   cl_command_queue
2847     queue;
2848 
2849   cl_int
2850     status;
2851 
2852   cl_kernel
2853     grayscaleKernel;
2854 
2855   cl_mem
2856     imageBuffer;
2857 
2858   cl_uint
2859     number_channels,
2860     colorspace,
2861     intensityMethod;
2862 
2863   MagickBooleanType
2864     outputReady;
2865 
2866   MagickCLDevice
2867     device;
2868 
2869   size_t
2870     gsize[2],
2871     i;
2872 
2873   outputReady=MagickFalse;
2874   imageBuffer=NULL;
2875   grayscaleKernel=NULL;
2876 
2877   assert(image != (Image *) NULL);
2878   assert(image->signature == MagickCoreSignature);
2879   device=RequestOpenCLDevice(clEnv);
2880   queue=AcquireOpenCLCommandQueue(device);
2881   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
2882   if (imageBuffer == (cl_mem) NULL)
2883     goto cleanup;
2884 
2885   grayscaleKernel=AcquireOpenCLKernel(device,"Grayscale");
2886   if (grayscaleKernel == (cl_kernel) NULL)
2887   {
2888     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2889       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
2890     goto cleanup;
2891   }
2892 
2893   number_channels=(cl_uint) image->number_channels;
2894   intensityMethod=(cl_uint) method;
2895   colorspace=(cl_uint) image->colorspace;
2896 
2897   i=0;
2898   status =SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
2899   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&number_channels);
2900   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&colorspace);
2901   status|=SetOpenCLKernelArg(grayscaleKernel,i++,sizeof(cl_uint),&intensityMethod);
2902   if (status != CL_SUCCESS)
2903   {
2904     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
2905       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
2906     goto cleanup;
2907   }
2908 
2909   gsize[0]=image->columns;
2910   gsize[1]=image->rows;
2911   outputReady=EnqueueOpenCLKernel(queue,grayscaleKernel,2,
2912     (const size_t *) NULL,gsize,(const size_t *) NULL,image,(Image *) NULL,
2913     MagickFalse,exception);
2914 
2915 cleanup:
2916 
2917   if (imageBuffer != (cl_mem) NULL)
2918     ReleaseOpenCLMemObject(imageBuffer);
2919   if (grayscaleKernel != (cl_kernel) NULL)
2920     ReleaseOpenCLKernel(grayscaleKernel);
2921   if (queue != (cl_command_queue) NULL)
2922     ReleaseOpenCLCommandQueue(device,queue);
2923   if (device != (MagickCLDevice) NULL)
2924     ReleaseOpenCLDevice(device);
2925 
2926   return(outputReady);
2927 }
2928 
AccelerateGrayscaleImage(Image * image,const PixelIntensityMethod method,ExceptionInfo * exception)2929 MagickPrivate MagickBooleanType AccelerateGrayscaleImage(Image* image,
2930   const PixelIntensityMethod method,ExceptionInfo *exception)
2931 {
2932   MagickBooleanType
2933     status;
2934 
2935   MagickCLEnv
2936     clEnv;
2937 
2938   assert(image != NULL);
2939   assert(exception != (ExceptionInfo *) NULL);
2940 
2941   if ((checkAccelerateCondition(image) == MagickFalse) ||
2942       (checkPixelIntensity(image,method) == MagickFalse))
2943     return(MagickFalse);
2944 
2945   if (image->number_channels < 3)
2946     return(MagickFalse);
2947 
2948   if ((GetPixelRedTraits(image) == UndefinedPixelTrait) ||
2949       (GetPixelGreenTraits(image) == UndefinedPixelTrait) ||
2950       (GetPixelBlueTraits(image) == UndefinedPixelTrait))
2951     return(MagickFalse);
2952 
2953   clEnv=getOpenCLEnvironment(exception);
2954   if (clEnv == (MagickCLEnv) NULL)
2955     return(MagickFalse);
2956 
2957   status=ComputeGrayscaleImage(image,clEnv,method,exception);
2958   return(status);
2959 }
2960 
2961 /*
2962 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2963 %                                                                             %
2964 %                                                                             %
2965 %                                                                             %
2966 %     A c c e l e r a t e L o c a l C o n t r a s t I m a g e                 %
2967 %                                                                             %
2968 %                                                                             %
2969 %                                                                             %
2970 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2971 */
2972 
ComputeLocalContrastImage(const Image * image,MagickCLEnv clEnv,const double radius,const double strength,ExceptionInfo * exception)2973 static Image *ComputeLocalContrastImage(const Image *image,MagickCLEnv clEnv,
2974   const double radius,const double strength,ExceptionInfo *exception)
2975 {
2976   CacheView
2977     *filteredImage_view,
2978     *image_view;
2979 
2980   cl_command_queue
2981     queue;
2982 
2983   cl_int
2984     clStatus,
2985     iRadius;
2986 
2987   cl_kernel
2988     blurRowKernel,
2989     blurColumnKernel;
2990 
2991   cl_event
2992     event;
2993 
2994   cl_mem
2995     filteredImageBuffer,
2996     imageBuffer,
2997     imageKernelBuffer,
2998     tempImageBuffer;
2999 
3000   cl_mem_flags
3001     mem_flags;
3002 
3003   const void
3004     *inputPixels;
3005 
3006   Image
3007     *filteredImage;
3008 
3009   MagickBooleanType
3010     outputReady;
3011 
3012   MagickCLDevice
3013     device;
3014 
3015   MagickSizeType
3016     length;
3017 
3018   void
3019     *filteredPixels,
3020     *hostPtr;
3021 
3022   unsigned int
3023     i,
3024     imageColumns,
3025     imageRows,
3026     passes;
3027 
3028   filteredImage = NULL;
3029   filteredImage_view = NULL;
3030   imageBuffer = NULL;
3031   filteredImageBuffer = NULL;
3032   tempImageBuffer = NULL;
3033   imageKernelBuffer = NULL;
3034   blurRowKernel = NULL;
3035   blurColumnKernel = NULL;
3036   queue = NULL;
3037   outputReady = MagickFalse;
3038 
3039   device = RequestOpenCLDevice(clEnv);
3040   queue = AcquireOpenCLCommandQueue(device);
3041 
3042   /* Create and initialize OpenCL buffers. */
3043   {
3044     image_view=AcquireAuthenticCacheView(image,exception);
3045     inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3046     if (inputPixels == (const void *) NULL)
3047     {
3048       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3049       goto cleanup;
3050     }
3051 
3052     /* If the host pointer is aligned to the size of CLPixelPacket,
3053      then use the host buffer directly from the GPU; otherwise,
3054      create a buffer on the GPU and copy the data over */
3055     if (ALIGNED(inputPixels,CLPixelPacket))
3056     {
3057       mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3058     }
3059     else
3060     {
3061       mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3062     }
3063     /* create a CL buffer from image pixel buffer */
3064     length = image->columns * image->rows;
3065     imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3066     if (clStatus != CL_SUCCESS)
3067     {
3068       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3069       goto cleanup;
3070     }
3071   }
3072 
3073   /* create output */
3074   {
3075     filteredImage = CloneImage(image,0,0,MagickTrue,exception);
3076     assert(filteredImage != NULL);
3077     if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3078     {
3079       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "CloneImage failed.", ".");
3080       goto cleanup;
3081     }
3082     filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3083     filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3084     if (filteredPixels == (void *) NULL)
3085     {
3086       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning, "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3087       goto cleanup;
3088     }
3089 
3090     if (ALIGNED(filteredPixels,CLPixelPacket))
3091     {
3092       mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3093       hostPtr = filteredPixels;
3094     }
3095     else
3096     {
3097       mem_flags = CL_MEM_WRITE_ONLY;
3098       hostPtr = NULL;
3099     }
3100 
3101     /* create a CL buffer from image pixel buffer */
3102     length = image->columns * image->rows;
3103     filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3104     if (clStatus != CL_SUCCESS)
3105     {
3106       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3107       goto cleanup;
3108     }
3109   }
3110 
3111   {
3112     /* create temp buffer */
3113     {
3114       length = image->columns * image->rows;
3115       tempImageBuffer = clEnv->library->clCreateBuffer(device->context, CL_MEM_READ_WRITE, length * sizeof(float), NULL, &clStatus);
3116       if (clStatus != CL_SUCCESS)
3117       {
3118         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3119         goto cleanup;
3120       }
3121     }
3122 
3123     /* get the opencl kernel */
3124     {
3125       blurRowKernel = AcquireOpenCLKernel(device,"LocalContrastBlurRow");
3126       if (blurRowKernel == NULL)
3127       {
3128         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3129         goto cleanup;
3130       };
3131 
3132       blurColumnKernel = AcquireOpenCLKernel(device,"LocalContrastBlurApplyColumn");
3133       if (blurColumnKernel == NULL)
3134       {
3135         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3136         goto cleanup;
3137       };
3138     }
3139 
3140     {
3141       imageColumns = (unsigned int) image->columns;
3142       imageRows = (unsigned int) image->rows;
3143       iRadius = (cl_int) (image->rows > image->columns ? image->rows : image->columns) * 0.002f * fabs(radius); /* Normalized radius, 100% gives blur radius of 20% of the largest dimension */
3144 
3145       passes = (((1.0f * imageRows) * imageColumns * iRadius) + 3999999999) / 4000000000.0f;
3146       passes = (passes < 1) ? 1: passes;
3147 
3148       /* set the kernel arguments */
3149       i = 0;
3150       clStatus=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3151       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3152       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3153       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(cl_int),(void *)&iRadius);
3154       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3155       clStatus|=clEnv->library->clSetKernelArg(blurRowKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3156 
3157       if (clStatus != CL_SUCCESS)
3158       {
3159         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3160         goto cleanup;
3161       }
3162     }
3163 
3164     /* launch the kernel */
3165     {
3166       int x;
3167       for (x = 0; x < passes; ++x) {
3168         size_t gsize[2];
3169         size_t wsize[2];
3170         size_t goffset[2];
3171 
3172         gsize[0] = 256;
3173         gsize[1] = (image->rows + passes - 1) / passes;
3174         wsize[0] = 256;
3175         wsize[1] = 1;
3176         goffset[0] = 0;
3177         goffset[1] = x * gsize[1];
3178 
3179         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurRowKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3180         if (clStatus != CL_SUCCESS)
3181         {
3182           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3183           goto cleanup;
3184         }
3185         clEnv->library->clFlush(queue);
3186         RecordProfileData(device,blurRowKernel,event);
3187       }
3188     }
3189 
3190     {
3191       cl_float FStrength = strength;
3192       i = 0;
3193       clStatus=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3194       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
3195       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
3196       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&iRadius);
3197       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(cl_float),(void *)&FStrength);
3198       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageColumns);
3199       clStatus|=clEnv->library->clSetKernelArg(blurColumnKernel,i++,sizeof(unsigned int),(void *)&imageRows);
3200 
3201       if (clStatus != CL_SUCCESS)
3202       {
3203         (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3204         goto cleanup;
3205       }
3206     }
3207 
3208     /* launch the kernel */
3209     {
3210       int x;
3211       for (x = 0; x < passes; ++x) {
3212         size_t gsize[2];
3213         size_t wsize[2];
3214         size_t goffset[2];
3215 
3216         gsize[0] = ((image->columns + 3) / 4) * 4;
3217         gsize[1] = ((((image->rows + 63) / 64) + (passes + 1)) / passes) * 64;
3218         wsize[0] = 4;
3219         wsize[1] = 64;
3220         goffset[0] = 0;
3221         goffset[1] = x * gsize[1];
3222 
3223         clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, blurColumnKernel, 2, goffset, gsize, wsize, 0, NULL, &event);
3224         if (clStatus != CL_SUCCESS)
3225         {
3226           (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3227           goto cleanup;
3228         }
3229         clEnv->library->clFlush(queue);
3230         RecordProfileData(device,blurColumnKernel,event);
3231       }
3232     }
3233   }
3234 
3235   /* get result */
3236   if (ALIGNED(filteredPixels,CLPixelPacket))
3237   {
3238     length = image->columns * image->rows;
3239     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3240   }
3241   else
3242   {
3243     length = image->columns * image->rows;
3244     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3245   }
3246   if (clStatus != CL_SUCCESS)
3247   {
3248     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3249     goto cleanup;
3250   }
3251 
3252   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3253 
3254 cleanup:
3255 
3256   image_view=DestroyCacheView(image_view);
3257   if (filteredImage_view != NULL)
3258     filteredImage_view=DestroyCacheView(filteredImage_view);
3259 
3260   if (imageBuffer!=NULL)
3261     clEnv->library->clReleaseMemObject(imageBuffer);
3262   if (filteredImageBuffer!=NULL)
3263     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3264   if (tempImageBuffer!=NULL)
3265     clEnv->library->clReleaseMemObject(tempImageBuffer);
3266   if (imageKernelBuffer!=NULL)
3267     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3268   if (blurRowKernel!=NULL)
3269     ReleaseOpenCLKernel(blurRowKernel);
3270   if (blurColumnKernel!=NULL)
3271     ReleaseOpenCLKernel(blurColumnKernel);
3272   if (queue != NULL)
3273     ReleaseOpenCLCommandQueue(device, queue);
3274   if (device != NULL)
3275     ReleaseOpenCLDevice(device);
3276   if (outputReady == MagickFalse)
3277   {
3278     if (filteredImage != NULL)
3279     {
3280       DestroyImage(filteredImage);
3281       filteredImage = NULL;
3282     }
3283   }
3284 
3285   return(filteredImage);
3286 }
3287 
AccelerateLocalContrastImage(const Image * image,const double radius,const double strength,ExceptionInfo * exception)3288 MagickPrivate Image *AccelerateLocalContrastImage(const Image *image,
3289   const double radius,const double strength,ExceptionInfo *exception)
3290 {
3291   Image
3292     *filteredImage;
3293 
3294   MagickCLEnv
3295     clEnv;
3296 
3297   assert(image != NULL);
3298   assert(exception != (ExceptionInfo *) NULL);
3299 
3300   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3301     return((Image *) NULL);
3302 
3303   clEnv=getOpenCLEnvironment(exception);
3304   if (clEnv == (MagickCLEnv) NULL)
3305     return((Image *) NULL);
3306 
3307   filteredImage=ComputeLocalContrastImage(image,clEnv,radius,strength,
3308     exception);
3309   return(filteredImage);
3310 }
3311 
3312 /*
3313 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3314 %                                                                             %
3315 %                                                                             %
3316 %                                                                             %
3317 %     A c c e l e r a t e M o d u l a t e I m a g e                           %
3318 %                                                                             %
3319 %                                                                             %
3320 %                                                                             %
3321 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3322 */
3323 
ComputeModulateImage(Image * image,MagickCLEnv clEnv,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3324 static MagickBooleanType ComputeModulateImage(Image *image,MagickCLEnv clEnv,
3325   const double percent_brightness,const double percent_hue,
3326   const double percent_saturation,const ColorspaceType colorspace,
3327   ExceptionInfo *exception)
3328 {
3329   CacheView
3330     *image_view;
3331 
3332   cl_float
3333     bright,
3334     hue,
3335     saturation;
3336 
3337   cl_command_queue
3338     queue;
3339 
3340   cl_int
3341     color,
3342     clStatus;
3343 
3344   cl_kernel
3345     modulateKernel;
3346 
3347   cl_event
3348     event;
3349 
3350   cl_mem
3351     imageBuffer;
3352 
3353   cl_mem_flags
3354     mem_flags;
3355 
3356   MagickBooleanType
3357     outputReady;
3358 
3359   MagickCLDevice
3360     device;
3361 
3362   MagickSizeType
3363     length;
3364 
3365   register ssize_t
3366     i;
3367 
3368   void
3369     *inputPixels;
3370 
3371   inputPixels = NULL;
3372   imageBuffer = NULL;
3373   modulateKernel = NULL;
3374 
3375   assert(image != (Image *) NULL);
3376   assert(image->signature == MagickCoreSignature);
3377   if (image->debug != MagickFalse)
3378     (void) LogMagickEvent(TraceEvent,GetMagickModule(),"%s",image->filename);
3379 
3380   /*
3381    * initialize opencl env
3382    */
3383   device = RequestOpenCLDevice(clEnv);
3384   queue = AcquireOpenCLCommandQueue(device);
3385 
3386   outputReady = MagickFalse;
3387 
3388   /* Create and initialize OpenCL buffers.
3389    inputPixels = AcquirePixelCachePixels(image, &length, exception);
3390    assume this  will get a writable image
3391    */
3392   image_view=AcquireAuthenticCacheView(image,exception);
3393   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3394   if (inputPixels == (void *) NULL)
3395   {
3396     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),CacheWarning,"UnableToReadPixelCache.","`%s'",image->filename);
3397     goto cleanup;
3398   }
3399 
3400   /* If the host pointer is aligned to the size of CLPixelPacket,
3401    then use the host buffer directly from the GPU; otherwise,
3402    create a buffer on the GPU and copy the data over
3403    */
3404   if (ALIGNED(inputPixels,CLPixelPacket))
3405   {
3406     mem_flags = CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR;
3407   }
3408   else
3409   {
3410     mem_flags = CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR;
3411   }
3412   /* create a CL buffer from image pixel buffer */
3413   length = image->columns * image->rows;
3414   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags, length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3415   if (clStatus != CL_SUCCESS)
3416   {
3417     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clCreateBuffer failed.",".");
3418     goto cleanup;
3419   }
3420 
3421   modulateKernel = AcquireOpenCLKernel(device, "Modulate");
3422   if (modulateKernel == NULL)
3423   {
3424     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "AcquireOpenCLKernel failed.", ".");
3425     goto cleanup;
3426   }
3427 
3428   bright=percent_brightness;
3429   hue=percent_hue;
3430   saturation=percent_saturation;
3431   color=colorspace;
3432 
3433   i = 0;
3434   clStatus=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
3435   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&bright);
3436   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&hue);
3437   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&saturation);
3438   clStatus|=clEnv->library->clSetKernelArg(modulateKernel,i++,sizeof(cl_float),&color);
3439   if (clStatus != CL_SUCCESS)
3440   {
3441     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clSetKernelArg failed.", ".");
3442     goto cleanup;
3443   }
3444 
3445   {
3446     size_t global_work_size[2];
3447     global_work_size[0] = image->columns;
3448     global_work_size[1] = image->rows;
3449     /* launch the kernel */
3450 	clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, modulateKernel, 2, NULL, global_work_size, NULL, 0, NULL, &event);
3451     if (clStatus != CL_SUCCESS)
3452     {
3453       (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3454       goto cleanup;
3455     }
3456     RecordProfileData(device,modulateKernel,event);
3457   }
3458 
3459   if (ALIGNED(inputPixels,CLPixelPacket))
3460   {
3461     length = image->columns * image->rows;
3462     clEnv->library->clEnqueueMapBuffer(queue, imageBuffer, CL_TRUE, CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL, NULL, &clStatus);
3463   }
3464   else
3465   {
3466     length = image->columns * image->rows;
3467     clStatus = clEnv->library->clEnqueueReadBuffer(queue, imageBuffer, CL_TRUE, 0, length * sizeof(CLPixelPacket), inputPixels, 0, NULL, NULL);
3468   }
3469   if (clStatus != CL_SUCCESS)
3470   {
3471     (void) OpenCLThrowMagickException(device,exception, GetMagickModule(), ResourceLimitWarning, "Reading output image from CL buffer failed.", ".");
3472     goto cleanup;
3473   }
3474 
3475   outputReady=SyncCacheViewAuthenticPixels(image_view,exception);
3476 
3477 cleanup:
3478 
3479   image_view=DestroyCacheView(image_view);
3480 
3481   if (imageBuffer!=NULL)
3482     clEnv->library->clReleaseMemObject(imageBuffer);
3483   if (modulateKernel!=NULL)
3484     ReleaseOpenCLKernel(modulateKernel);
3485   if (queue != NULL)
3486     ReleaseOpenCLCommandQueue(device,queue);
3487   if (device != NULL)
3488     ReleaseOpenCLDevice(device);
3489 
3490   return outputReady;
3491 
3492 }
3493 
AccelerateModulateImage(Image * image,const double percent_brightness,const double percent_hue,const double percent_saturation,const ColorspaceType colorspace,ExceptionInfo * exception)3494 MagickPrivate MagickBooleanType AccelerateModulateImage(Image *image,
3495   const double percent_brightness,const double percent_hue,
3496   const double percent_saturation,const ColorspaceType colorspace,
3497   ExceptionInfo *exception)
3498 {
3499   MagickBooleanType
3500     status;
3501 
3502   MagickCLEnv
3503     clEnv;
3504 
3505   assert(image != NULL);
3506   assert(exception != (ExceptionInfo *) NULL);
3507 
3508   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3509     return(MagickFalse);
3510 
3511   if ((colorspace != HSLColorspace) && (colorspace != UndefinedColorspace))
3512     return(MagickFalse);
3513 
3514   clEnv=getOpenCLEnvironment(exception);
3515   if (clEnv == (MagickCLEnv) NULL)
3516     return(MagickFalse);
3517 
3518   status=ComputeModulateImage(image,clEnv,percent_brightness,percent_hue,
3519     percent_saturation,colorspace,exception);
3520   return(status);
3521 }
3522 
3523 /*
3524 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3525 %                                                                             %
3526 %                                                                             %
3527 %                                                                             %
3528 %     A c c e l e r a t e M o t i o n B l u r I m a g e                       %
3529 %                                                                             %
3530 %                                                                             %
3531 %                                                                             %
3532 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3533 */
3534 
ComputeMotionBlurImage(const Image * image,MagickCLEnv clEnv,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3535 static Image* ComputeMotionBlurImage(const Image *image,MagickCLEnv clEnv,
3536   const double *kernel,const size_t width,const OffsetInfo *offset,
3537   ExceptionInfo *exception)
3538 {
3539   CacheView
3540     *filteredImage_view,
3541     *image_view;
3542 
3543   cl_command_queue
3544     queue;
3545 
3546   cl_float4
3547     biasPixel;
3548 
3549   cl_int
3550     clStatus;
3551 
3552   cl_kernel
3553     motionBlurKernel;
3554 
3555   cl_event
3556     event;
3557 
3558   cl_mem
3559     filteredImageBuffer,
3560     imageBuffer,
3561     imageKernelBuffer,
3562     offsetBuffer;
3563 
3564   cl_mem_flags
3565     mem_flags;
3566 
3567   const void
3568     *inputPixels;
3569 
3570   float
3571     *kernelBufferPtr;
3572 
3573   Image
3574     *filteredImage;
3575 
3576   int
3577     *offsetBufferPtr;
3578 
3579   MagickBooleanType
3580     outputReady;
3581 
3582   MagickCLDevice
3583     device;
3584 
3585   PixelInfo
3586     bias;
3587 
3588   MagickSizeType
3589     length;
3590 
3591   size_t
3592     global_work_size[2],
3593     local_work_size[2];
3594 
3595   unsigned int
3596     i,
3597     imageHeight,
3598     imageWidth,
3599     matte;
3600 
3601   void
3602     *filteredPixels,
3603     *hostPtr;
3604 
3605   outputReady = MagickFalse;
3606   filteredImage = NULL;
3607   filteredImage_view = NULL;
3608   imageBuffer = NULL;
3609   filteredImageBuffer = NULL;
3610   imageKernelBuffer = NULL;
3611   motionBlurKernel = NULL;
3612   queue = NULL;
3613 
3614   device = RequestOpenCLDevice(clEnv);
3615 
3616   /* Create and initialize OpenCL buffers. */
3617 
3618   image_view=AcquireAuthenticCacheView(image,exception);
3619   inputPixels=GetCacheViewAuthenticPixels(image_view,0,0,image->columns,image->rows,exception);
3620   if (inputPixels == (const void *) NULL)
3621   {
3622     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3623       "UnableToReadPixelCache.","`%s'",image->filename);
3624     goto cleanup;
3625   }
3626 
3627   /*
3628     If the host pointer is aligned to the size of CLPixelPacket, then use
3629     the host buffer directly from the GPU; otherwise, create a buffer on
3630     the GPU and copy the data over
3631   */
3632   if (ALIGNED(inputPixels,CLPixelPacket))
3633   {
3634     mem_flags = CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR;
3635   }
3636   else
3637   {
3638     mem_flags = CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR;
3639   }
3640   /*
3641     create a CL buffer from image pixel buffer
3642   */
3643   length = image->columns * image->rows;
3644   imageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3645     length * sizeof(CLPixelPacket), (void*)inputPixels, &clStatus);
3646   if (clStatus != CL_SUCCESS)
3647   {
3648     (void) ThrowMagickException(exception, GetMagickModule(),
3649       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3650     goto cleanup;
3651   }
3652 
3653 
3654   filteredImage = CloneImage(image,image->columns,image->rows,
3655     MagickTrue,exception);
3656   assert(filteredImage != NULL);
3657   if (SetImageStorageClass(filteredImage,DirectClass,exception) != MagickTrue)
3658   {
3659     (void) ThrowMagickException(exception, GetMagickModule(),
3660       ResourceLimitError, "CloneImage failed.", ".");
3661     goto cleanup;
3662   }
3663   filteredImage_view=AcquireAuthenticCacheView(filteredImage,exception);
3664   filteredPixels=GetCacheViewAuthenticPixels(filteredImage_view,0,0,filteredImage->columns,filteredImage->rows,exception);
3665   if (filteredPixels == (void *) NULL)
3666   {
3667     (void) ThrowMagickException(exception,GetMagickModule(),CacheError,
3668       "UnableToReadPixelCache.","`%s'",filteredImage->filename);
3669     goto cleanup;
3670   }
3671 
3672   if (ALIGNED(filteredPixels,CLPixelPacket))
3673   {
3674     mem_flags = CL_MEM_WRITE_ONLY|CL_MEM_USE_HOST_PTR;
3675     hostPtr = filteredPixels;
3676   }
3677   else
3678   {
3679     mem_flags = CL_MEM_WRITE_ONLY;
3680     hostPtr = NULL;
3681   }
3682   /*
3683     Create a CL buffer from image pixel buffer.
3684   */
3685   length = image->columns * image->rows;
3686   filteredImageBuffer = clEnv->library->clCreateBuffer(device->context, mem_flags,
3687     length * sizeof(CLPixelPacket), hostPtr, &clStatus);
3688   if (clStatus != CL_SUCCESS)
3689   {
3690     (void) ThrowMagickException(exception, GetMagickModule(),
3691       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3692     goto cleanup;
3693   }
3694 
3695 
3696   imageKernelBuffer = clEnv->library->clCreateBuffer(device->context,
3697     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(float), NULL,
3698     &clStatus);
3699   if (clStatus != CL_SUCCESS)
3700   {
3701     (void) ThrowMagickException(exception, GetMagickModule(),
3702       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3703     goto cleanup;
3704   }
3705 
3706   queue = AcquireOpenCLCommandQueue(device);
3707   kernelBufferPtr = (float*)clEnv->library->clEnqueueMapBuffer(queue, imageKernelBuffer,
3708     CL_TRUE, CL_MAP_WRITE, 0, width * sizeof(float), 0, NULL, NULL, &clStatus);
3709   if (clStatus != CL_SUCCESS)
3710   {
3711     (void) ThrowMagickException(exception, GetMagickModule(),
3712       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3713     goto cleanup;
3714   }
3715   for (i = 0; i < width; i++)
3716   {
3717     kernelBufferPtr[i] = (float) kernel[i];
3718   }
3719   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, imageKernelBuffer, kernelBufferPtr,
3720     0, NULL, NULL);
3721  if (clStatus != CL_SUCCESS)
3722   {
3723     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3724       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3725     goto cleanup;
3726   }
3727 
3728   offsetBuffer = clEnv->library->clCreateBuffer(device->context,
3729     CL_MEM_READ_ONLY|CL_MEM_ALLOC_HOST_PTR, width * sizeof(cl_int2), NULL,
3730     &clStatus);
3731   if (clStatus != CL_SUCCESS)
3732   {
3733     (void) ThrowMagickException(exception, GetMagickModule(),
3734       ResourceLimitError, "clEnv->library->clCreateBuffer failed.",".");
3735     goto cleanup;
3736   }
3737 
3738   offsetBufferPtr = (int*)clEnv->library->clEnqueueMapBuffer(queue, offsetBuffer, CL_TRUE,
3739     CL_MAP_WRITE, 0, width * sizeof(cl_int2), 0, NULL, NULL, &clStatus);
3740   if (clStatus != CL_SUCCESS)
3741   {
3742     (void) ThrowMagickException(exception, GetMagickModule(),
3743       ResourceLimitError, "clEnv->library->clEnqueueMapBuffer failed.",".");
3744     goto cleanup;
3745   }
3746   for (i = 0; i < width; i++)
3747   {
3748     offsetBufferPtr[2*i] = (int)offset[i].x;
3749     offsetBufferPtr[2*i+1] = (int)offset[i].y;
3750   }
3751   clStatus = clEnv->library->clEnqueueUnmapMemObject(queue, offsetBuffer, offsetBufferPtr, 0,
3752     NULL, NULL);
3753  if (clStatus != CL_SUCCESS)
3754   {
3755     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3756       "clEnv->library->clEnqueueUnmapMemObject failed.", ".");
3757     goto cleanup;
3758   }
3759 
3760 
3761   /*
3762     Get the OpenCL kernel
3763   */
3764   motionBlurKernel = AcquireOpenCLKernel(device,"MotionBlur");
3765   if (motionBlurKernel == NULL)
3766   {
3767     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3768       "AcquireOpenCLKernel failed.", ".");
3769     goto cleanup;
3770   }
3771 
3772   /*
3773     Set the kernel arguments.
3774   */
3775   i = 0;
3776   clStatus=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3777     (void *)&imageBuffer);
3778   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3779     (void *)&filteredImageBuffer);
3780   imageWidth = (unsigned int) image->columns;
3781   imageHeight = (unsigned int) image->rows;
3782   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3783     &imageWidth);
3784   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3785     &imageHeight);
3786   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3787     (void *)&imageKernelBuffer);
3788   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int),
3789     &width);
3790   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_mem),
3791     (void *)&offsetBuffer);
3792 
3793   GetPixelInfo(image,&bias);
3794   biasPixel.s[0] = bias.red;
3795   biasPixel.s[1] = bias.green;
3796   biasPixel.s[2] = bias.blue;
3797   biasPixel.s[3] = bias.alpha;
3798   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(cl_float4), &biasPixel);
3799 
3800   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
3801   matte = (image->alpha_trait > CopyPixelTrait)?1:0;
3802   clStatus|=clEnv->library->clSetKernelArg(motionBlurKernel,i++,sizeof(unsigned int), &matte);
3803   if (clStatus != CL_SUCCESS)
3804   {
3805     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3806       "clEnv->library->clSetKernelArg failed.", ".");
3807     goto cleanup;
3808   }
3809 
3810   /*
3811     Launch the kernel.
3812   */
3813   local_work_size[0] = 16;
3814   local_work_size[1] = 16;
3815   global_work_size[0] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3816                                 (unsigned int) image->columns,(unsigned int) local_work_size[0]);
3817   global_work_size[1] = (size_t)padGlobalWorkgroupSizeToLocalWorkgroupSize(
3818                                 (unsigned int) image->rows,(unsigned int) local_work_size[1]);
3819   clStatus = clEnv->library->clEnqueueNDRangeKernel(queue, motionBlurKernel, 2, NULL,
3820 	  global_work_size, local_work_size, 0, NULL, &event);
3821 
3822   if (clStatus != CL_SUCCESS)
3823   {
3824     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3825       "clEnv->library->clEnqueueNDRangeKernel failed.", ".");
3826     goto cleanup;
3827   }
3828   RecordProfileData(device,motionBlurKernel,event);
3829 
3830   if (ALIGNED(filteredPixels,CLPixelPacket))
3831   {
3832     length = image->columns * image->rows;
3833     clEnv->library->clEnqueueMapBuffer(queue, filteredImageBuffer, CL_TRUE,
3834       CL_MAP_READ|CL_MAP_WRITE, 0, length * sizeof(CLPixelPacket), 0, NULL,
3835       NULL, &clStatus);
3836   }
3837   else
3838   {
3839     length = image->columns * image->rows;
3840     clStatus = clEnv->library->clEnqueueReadBuffer(queue, filteredImageBuffer, CL_TRUE, 0,
3841       length * sizeof(CLPixelPacket), filteredPixels, 0, NULL, NULL);
3842   }
3843   if (clStatus != CL_SUCCESS)
3844   {
3845     (void) ThrowMagickException(exception, GetMagickModule(), ModuleFatalError,
3846       "Reading output image from CL buffer failed.", ".");
3847     goto cleanup;
3848   }
3849   outputReady=SyncCacheViewAuthenticPixels(filteredImage_view,exception);
3850 
3851 cleanup:
3852 
3853   image_view=DestroyCacheView(image_view);
3854   if (filteredImage_view != NULL)
3855     filteredImage_view=DestroyCacheView(filteredImage_view);
3856 
3857   if (filteredImageBuffer!=NULL)
3858     clEnv->library->clReleaseMemObject(filteredImageBuffer);
3859   if (imageBuffer!=NULL)
3860     clEnv->library->clReleaseMemObject(imageBuffer);
3861   if (imageKernelBuffer!=NULL)
3862     clEnv->library->clReleaseMemObject(imageKernelBuffer);
3863   if (motionBlurKernel!=NULL)
3864     ReleaseOpenCLKernel(motionBlurKernel);
3865   if (queue != NULL)
3866     ReleaseOpenCLCommandQueue(device,queue);
3867   if (device != NULL)
3868     ReleaseOpenCLDevice(device);
3869   if (outputReady == MagickFalse && filteredImage != NULL)
3870     filteredImage=DestroyImage(filteredImage);
3871 
3872   return(filteredImage);
3873 }
3874 
AccelerateMotionBlurImage(const Image * image,const double * kernel,const size_t width,const OffsetInfo * offset,ExceptionInfo * exception)3875 MagickPrivate Image *AccelerateMotionBlurImage(const Image *image,
3876   const double* kernel,const size_t width,const OffsetInfo *offset,
3877   ExceptionInfo *exception)
3878 {
3879   Image
3880     *filteredImage;
3881 
3882   MagickCLEnv
3883     clEnv;
3884 
3885   assert(image != NULL);
3886   assert(kernel != (double *) NULL);
3887   assert(offset != (OffsetInfo *) NULL);
3888   assert(exception != (ExceptionInfo *) NULL);
3889 
3890   if (checkAccelerateConditionRGBA(image) == MagickFalse)
3891     return((Image *) NULL);
3892 
3893   clEnv=getOpenCLEnvironment(exception);
3894   if (clEnv == (MagickCLEnv) NULL)
3895     return((Image *) NULL);
3896 
3897   filteredImage=ComputeMotionBlurImage(image,clEnv,kernel,width,offset,
3898     exception);
3899   return(filteredImage);
3900 }
3901 
3902 /*
3903 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3904 %                                                                             %
3905 %                                                                             %
3906 %                                                                             %
3907 %     A c c e l e r a t e R e s i z e I m a g e                               %
3908 %                                                                             %
3909 %                                                                             %
3910 %                                                                             %
3911 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3912 */
3913 
resizeHorizontalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float xFactor,ExceptionInfo * exception)3914 static MagickBooleanType resizeHorizontalFilter(MagickCLDevice device,
3915   cl_command_queue queue,const Image *image,Image *filteredImage,
3916   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
3917   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
3918   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
3919   const float xFactor,ExceptionInfo *exception)
3920 {
3921   cl_kernel
3922     horizontalKernel;
3923 
3924   cl_int
3925     status;
3926 
3927   const unsigned int
3928     workgroupSize = 256;
3929 
3930   float
3931     resizeFilterScale,
3932     resizeFilterSupport,
3933     resizeFilterWindowSupport,
3934     resizeFilterBlur,
3935     scale,
3936     support;
3937 
3938   int
3939     cacheRangeStart,
3940     cacheRangeEnd,
3941     numCachedPixels,
3942     resizeFilterType,
3943     resizeWindowType;
3944 
3945   MagickBooleanType
3946     outputReady;
3947 
3948   size_t
3949     gammaAccumulatorLocalMemorySize,
3950     gsize[2],
3951     i,
3952     imageCacheLocalMemorySize,
3953     pixelAccumulatorLocalMemorySize,
3954     lsize[2],
3955     totalLocalMemorySize,
3956     weightAccumulatorLocalMemorySize;
3957 
3958   unsigned int
3959     chunkSize,
3960     pixelPerWorkgroup;
3961 
3962   horizontalKernel=NULL;
3963   outputReady=MagickFalse;
3964 
3965   /*
3966   Apply filter to resize vertically from image to resize image.
3967   */
3968   scale=MAGICK_MAX(1.0/xFactor+MagickEpsilon,1.0);
3969   support=scale*GetResizeFilterSupport(resizeFilter);
3970   if (support < 0.5)
3971   {
3972     /*
3973     Support too small even for nearest neighbour: Reduce to point
3974     sampling.
3975     */
3976     support=(float) 0.5;
3977     scale=1.0;
3978   }
3979   scale=PerceptibleReciprocal(scale);
3980 
3981   if (resizedColumns < workgroupSize)
3982   {
3983     chunkSize=32;
3984     pixelPerWorkgroup=32;
3985   }
3986   else
3987   {
3988     chunkSize=workgroupSize;
3989     pixelPerWorkgroup=workgroupSize;
3990   }
3991 
3992 DisableMSCWarning(4127)
3993   while(1)
3994 RestoreMSCWarning
3995   {
3996     /* calculate the local memory size needed per workgroup */
3997     cacheRangeStart=(int) (((0 + 0.5)/xFactor+MagickEpsilon)-support+0.5);
3998     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/xFactor+
3999       MagickEpsilon)+support+0.5);
4000     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4001     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4002       number_channels;
4003     totalLocalMemorySize=imageCacheLocalMemorySize;
4004 
4005     /* local size for the pixel accumulator */
4006     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4007     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4008 
4009     /* local memory size for the weight accumulator */
4010     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4011     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4012 
4013     /* local memory size for the gamma accumulator */
4014     if ((number_channels == 4) || (number_channels == 2))
4015       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4016     else
4017       gammaAccumulatorLocalMemorySize=sizeof(float);
4018     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4019 
4020     if (totalLocalMemorySize <= device->local_memory_size)
4021       break;
4022     else
4023     {
4024       pixelPerWorkgroup=pixelPerWorkgroup/2;
4025       chunkSize=chunkSize/2;
4026       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4027       {
4028         /* quit, fallback to CPU */
4029         goto cleanup;
4030       }
4031     }
4032   }
4033 
4034   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4035   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4036 
4037   horizontalKernel=AcquireOpenCLKernel(device,"ResizeHorizontalFilter");
4038   if (horizontalKernel == (cl_kernel) NULL)
4039   {
4040     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4041       ResourceLimitWarning,"AcquireOpenCLKernel failed.", ".");
4042     goto cleanup;
4043   }
4044 
4045   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4046   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4047   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4048   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4049 
4050   i=0;
4051   status =SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4052   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4053   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&columns);
4054   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&rows);
4055   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4056   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4057   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4058   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&xFactor);
4059   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4060   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4061   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4062   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4063   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4064   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4065   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4066   status|=SetOpenCLKernelArg(horizontalKernel,i++,imageCacheLocalMemorySize,NULL);
4067   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(int),&numCachedPixels);
4068   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&pixelPerWorkgroup);
4069   status|=SetOpenCLKernelArg(horizontalKernel,i++,sizeof(unsigned int),&chunkSize);
4070   status|=SetOpenCLKernelArg(horizontalKernel,i++,pixelAccumulatorLocalMemorySize,NULL);
4071   status|=SetOpenCLKernelArg(horizontalKernel,i++,weightAccumulatorLocalMemorySize,NULL);
4072   status|=SetOpenCLKernelArg(horizontalKernel,i++,gammaAccumulatorLocalMemorySize,NULL);
4073 
4074   if (status != CL_SUCCESS)
4075   {
4076     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4077       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4078     goto cleanup;
4079   }
4080 
4081   gsize[0]=(resizedColumns+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4082     workgroupSize;
4083   gsize[1]=resizedRows;
4084   lsize[0]=workgroupSize;
4085   lsize[1]=1;
4086   outputReady=EnqueueOpenCLKernel(queue,horizontalKernel,2,
4087     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4088     exception);
4089 
4090 cleanup:
4091 
4092   if (horizontalKernel != (cl_kernel) NULL)
4093     ReleaseOpenCLKernel(horizontalKernel);
4094 
4095   return(outputReady);
4096 }
4097 
resizeVerticalFilter(MagickCLDevice device,cl_command_queue queue,const Image * image,Image * filteredImage,cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,const ResizeFilter * resizeFilter,cl_mem resizeFilterCubicCoefficients,const float yFactor,ExceptionInfo * exception)4098 static MagickBooleanType resizeVerticalFilter(MagickCLDevice device,
4099   cl_command_queue queue,const Image *image,Image * filteredImage,
4100   cl_mem imageBuffer,cl_uint number_channels,cl_uint columns,cl_uint rows,
4101   cl_mem resizedImageBuffer,cl_uint resizedColumns,cl_uint resizedRows,
4102   const ResizeFilter *resizeFilter,cl_mem resizeFilterCubicCoefficients,
4103   const float yFactor,ExceptionInfo *exception)
4104 {
4105   cl_kernel
4106     verticalKernel;
4107 
4108   cl_int
4109     status;
4110 
4111   const unsigned int
4112     workgroupSize = 256;
4113 
4114   float
4115     resizeFilterScale,
4116     resizeFilterSupport,
4117     resizeFilterWindowSupport,
4118     resizeFilterBlur,
4119     scale,
4120     support;
4121 
4122   int
4123     cacheRangeStart,
4124     cacheRangeEnd,
4125     numCachedPixels,
4126     resizeFilterType,
4127     resizeWindowType;
4128 
4129   MagickBooleanType
4130     outputReady;
4131 
4132   size_t
4133     gammaAccumulatorLocalMemorySize,
4134     gsize[2],
4135     i,
4136     imageCacheLocalMemorySize,
4137     pixelAccumulatorLocalMemorySize,
4138     lsize[2],
4139     totalLocalMemorySize,
4140     weightAccumulatorLocalMemorySize;
4141 
4142   unsigned int
4143     chunkSize,
4144     pixelPerWorkgroup;
4145 
4146   verticalKernel=NULL;
4147   outputReady=MagickFalse;
4148 
4149   /*
4150   Apply filter to resize vertically from image to resize image.
4151   */
4152   scale=MAGICK_MAX(1.0/yFactor+MagickEpsilon,1.0);
4153   support=scale*GetResizeFilterSupport(resizeFilter);
4154   if (support < 0.5)
4155   {
4156     /*
4157     Support too small even for nearest neighbour: Reduce to point
4158     sampling.
4159     */
4160     support=(float) 0.5;
4161     scale=1.0;
4162   }
4163   scale=PerceptibleReciprocal(scale);
4164 
4165   if (resizedRows < workgroupSize)
4166   {
4167     chunkSize=32;
4168     pixelPerWorkgroup=32;
4169   }
4170   else
4171   {
4172     chunkSize=workgroupSize;
4173     pixelPerWorkgroup=workgroupSize;
4174   }
4175 
4176 DisableMSCWarning(4127)
4177   while(1)
4178 RestoreMSCWarning
4179   {
4180     /* calculate the local memory size needed per workgroup */
4181     cacheRangeStart=(int) (((0 + 0.5)/yFactor+MagickEpsilon)-support+0.5);
4182     cacheRangeEnd=(int) ((((pixelPerWorkgroup-1) + 0.5)/yFactor+
4183       MagickEpsilon)+support+0.5);
4184     numCachedPixels=cacheRangeEnd-cacheRangeStart+1;
4185     imageCacheLocalMemorySize=numCachedPixels*sizeof(CLQuantum)*
4186       number_channels;
4187     totalLocalMemorySize=imageCacheLocalMemorySize;
4188 
4189     /* local size for the pixel accumulator */
4190     pixelAccumulatorLocalMemorySize=chunkSize*sizeof(cl_float4);
4191     totalLocalMemorySize+=pixelAccumulatorLocalMemorySize;
4192 
4193     /* local memory size for the weight accumulator */
4194     weightAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4195     totalLocalMemorySize+=weightAccumulatorLocalMemorySize;
4196 
4197     /* local memory size for the gamma accumulator */
4198     if ((number_channels == 4) || (number_channels == 2))
4199       gammaAccumulatorLocalMemorySize=chunkSize*sizeof(float);
4200     else
4201       gammaAccumulatorLocalMemorySize=sizeof(float);
4202     totalLocalMemorySize+=gammaAccumulatorLocalMemorySize;
4203 
4204     if (totalLocalMemorySize <= device->local_memory_size)
4205       break;
4206     else
4207     {
4208       pixelPerWorkgroup=pixelPerWorkgroup/2;
4209       chunkSize=chunkSize/2;
4210       if ((pixelPerWorkgroup == 0) || (chunkSize == 0))
4211       {
4212         /* quit, fallback to CPU */
4213         goto cleanup;
4214       }
4215     }
4216   }
4217 
4218   resizeFilterType=(int)GetResizeFilterWeightingType(resizeFilter);
4219   resizeWindowType=(int)GetResizeFilterWindowWeightingType(resizeFilter);
4220 
4221   verticalKernel=AcquireOpenCLKernel(device,"ResizeVerticalFilter");
4222   if (verticalKernel == (cl_kernel) NULL)
4223   {
4224     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4225       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4226     goto cleanup;
4227   }
4228 
4229   resizeFilterScale=(float) GetResizeFilterScale(resizeFilter);
4230   resizeFilterSupport=(float) GetResizeFilterSupport(resizeFilter);
4231   resizeFilterBlur=(float) GetResizeFilterBlur(resizeFilter);
4232   resizeFilterWindowSupport=(float) GetResizeFilterWindowSupport(resizeFilter);
4233 
4234   i=0;
4235   status =SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&imageBuffer);
4236   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&number_channels);
4237   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&columns);
4238   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&rows);
4239   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizedImageBuffer);
4240   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedColumns);
4241   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_uint),(void*)&resizedRows);
4242   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&yFactor);
4243   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeFilterType);
4244   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int),(void*)&resizeWindowType);
4245   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(cl_mem),(void*)&resizeFilterCubicCoefficients);
4246   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterScale);
4247   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterSupport);
4248   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterWindowSupport);
4249   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(float),(void*)&resizeFilterBlur);
4250   status|=SetOpenCLKernelArg(verticalKernel,i++,imageCacheLocalMemorySize, NULL);
4251   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(int), &numCachedPixels);
4252   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &pixelPerWorkgroup);
4253   status|=SetOpenCLKernelArg(verticalKernel,i++,sizeof(unsigned int), &chunkSize);
4254   status|=SetOpenCLKernelArg(verticalKernel,i++,pixelAccumulatorLocalMemorySize, NULL);
4255   status|=SetOpenCLKernelArg(verticalKernel,i++,weightAccumulatorLocalMemorySize, NULL);
4256   status|=SetOpenCLKernelArg(verticalKernel,i++,gammaAccumulatorLocalMemorySize, NULL);
4257 
4258   if (status != CL_SUCCESS)
4259   {
4260     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4261       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4262     goto cleanup;
4263   }
4264 
4265   gsize[0]=resizedColumns;
4266   gsize[1]=(resizedRows+pixelPerWorkgroup-1)/pixelPerWorkgroup*
4267     workgroupSize;
4268   lsize[0]=1;
4269   lsize[1]=workgroupSize;
4270   outputReady=EnqueueOpenCLKernel(queue,verticalKernel,2,(const size_t *) NULL,
4271     gsize,lsize,image,filteredImage,MagickFalse,exception);
4272 
4273 cleanup:
4274 
4275   if (verticalKernel != (cl_kernel) NULL)
4276     ReleaseOpenCLKernel(verticalKernel);
4277 
4278   return(outputReady);
4279 }
4280 
ComputeResizeImage(const Image * image,MagickCLEnv clEnv,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4281 static Image *ComputeResizeImage(const Image* image,MagickCLEnv clEnv,
4282   const size_t resizedColumns,const size_t resizedRows,
4283   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4284 {
4285   cl_command_queue
4286     queue;
4287 
4288   cl_mem
4289     cubicCoefficientsBuffer,
4290     filteredImageBuffer,
4291     imageBuffer,
4292     tempImageBuffer;
4293 
4294   cl_uint
4295     number_channels;
4296 
4297   const double
4298     *resizeFilterCoefficient;
4299 
4300   float
4301     coefficientBuffer[7],
4302     xFactor,
4303     yFactor;
4304 
4305   MagickBooleanType
4306     outputReady;
4307 
4308   MagickCLDevice
4309     device;
4310 
4311   MagickSizeType
4312     length;
4313 
4314   Image
4315     *filteredImage;
4316 
4317   size_t
4318     i;
4319 
4320   filteredImage=NULL;
4321   imageBuffer=NULL;
4322   filteredImageBuffer=NULL;
4323   tempImageBuffer=NULL;
4324   cubicCoefficientsBuffer=NULL;
4325   outputReady=MagickFalse;
4326 
4327   device=RequestOpenCLDevice(clEnv);
4328   queue=AcquireOpenCLCommandQueue(device);
4329   filteredImage=CloneImage(image,resizedColumns,resizedRows,MagickTrue,
4330     exception);
4331   if (filteredImage == (Image *) NULL)
4332     goto cleanup;
4333   if (filteredImage->number_channels != image->number_channels)
4334     goto cleanup;
4335   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4336   if (imageBuffer == (cl_mem) NULL)
4337     goto cleanup;
4338   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4339   if (filteredImageBuffer == (cl_mem) NULL)
4340     goto cleanup;
4341 
4342   resizeFilterCoefficient=GetResizeFilterCoefficient(resizeFilter);
4343   for (i = 0; i < 7; i++)
4344     coefficientBuffer[i]=(float) resizeFilterCoefficient[i];
4345   cubicCoefficientsBuffer=CreateOpenCLBuffer(device,CL_MEM_COPY_HOST_PTR |
4346     CL_MEM_READ_ONLY,sizeof(coefficientBuffer),&coefficientBuffer);
4347   if (cubicCoefficientsBuffer == (cl_mem) NULL)
4348   {
4349     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4350       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4351     goto cleanup;
4352   }
4353 
4354   number_channels=(cl_uint) image->number_channels;
4355   xFactor=(float) resizedColumns/(float) image->columns;
4356   yFactor=(float) resizedRows/(float) image->rows;
4357   if (xFactor > yFactor)
4358   {
4359     length=resizedColumns*image->rows*number_channels;
4360     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4361       sizeof(CLQuantum),(void *) NULL);
4362     if (tempImageBuffer == (cl_mem) NULL)
4363     {
4364       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4365         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4366       goto cleanup;
4367     }
4368 
4369     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4370       imageBuffer,number_channels,(cl_uint) image->columns,
4371       (cl_uint) image->rows,tempImageBuffer,(cl_uint) resizedColumns,
4372       (cl_uint) image->rows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4373       exception);
4374     if (outputReady == MagickFalse)
4375       goto cleanup;
4376 
4377     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4378       tempImageBuffer,number_channels,(cl_uint) resizedColumns,
4379       (cl_uint) image->rows,filteredImageBuffer,(cl_uint) resizedColumns,
4380       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4381       exception);
4382     if (outputReady == MagickFalse)
4383       goto cleanup;
4384   }
4385   else
4386   {
4387     length=image->columns*resizedRows*number_channels;
4388     tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4389       sizeof(CLQuantum),(void *) NULL);
4390     if (tempImageBuffer == (cl_mem) NULL)
4391     {
4392       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4393         ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4394       goto cleanup;
4395     }
4396 
4397     outputReady=resizeVerticalFilter(device,queue,image,filteredImage,
4398       imageBuffer,number_channels,(cl_uint) image->columns,
4399       (cl_int) image->rows,tempImageBuffer,(cl_uint) image->columns,
4400       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,yFactor,
4401       exception);
4402     if (outputReady == MagickFalse)
4403       goto cleanup;
4404 
4405     outputReady=resizeHorizontalFilter(device,queue,image,filteredImage,
4406       tempImageBuffer,number_channels,(cl_uint) image->columns,
4407       (cl_uint) resizedRows,filteredImageBuffer,(cl_uint) resizedColumns,
4408       (cl_uint) resizedRows,resizeFilter,cubicCoefficientsBuffer,xFactor,
4409       exception);
4410     if (outputReady == MagickFalse)
4411       goto cleanup;
4412   }
4413 
4414 cleanup:
4415 
4416   if (imageBuffer != (cl_mem) NULL)
4417     ReleaseOpenCLMemObject(imageBuffer);
4418   if (filteredImageBuffer != (cl_mem) NULL)
4419     ReleaseOpenCLMemObject(filteredImageBuffer);
4420   if (tempImageBuffer != (cl_mem) NULL)
4421     ReleaseOpenCLMemObject(tempImageBuffer);
4422   if (cubicCoefficientsBuffer != (cl_mem) NULL)
4423     ReleaseOpenCLMemObject(cubicCoefficientsBuffer);
4424   if (queue != (cl_command_queue) NULL)
4425     ReleaseOpenCLCommandQueue(device,queue);
4426   if (device != (MagickCLDevice) NULL)
4427     ReleaseOpenCLDevice(device);
4428   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4429     filteredImage=DestroyImage(filteredImage);
4430 
4431   return(filteredImage);
4432 }
4433 
gpuSupportedResizeWeighting(ResizeWeightingFunctionType f)4434 static MagickBooleanType gpuSupportedResizeWeighting(
4435   ResizeWeightingFunctionType f)
4436 {
4437   unsigned int
4438     i;
4439 
4440   for (i = 0; ;i++)
4441   {
4442     if (supportedResizeWeighting[i] == LastWeightingFunction)
4443       break;
4444     if (supportedResizeWeighting[i] == f)
4445       return(MagickTrue);
4446   }
4447   return(MagickFalse);
4448 }
4449 
AccelerateResizeImage(const Image * image,const size_t resizedColumns,const size_t resizedRows,const ResizeFilter * resizeFilter,ExceptionInfo * exception)4450 MagickPrivate Image *AccelerateResizeImage(const Image *image,
4451   const size_t resizedColumns,const size_t resizedRows,
4452   const ResizeFilter *resizeFilter,ExceptionInfo *exception)
4453 {
4454   Image
4455     *filteredImage;
4456 
4457   MagickCLEnv
4458     clEnv;
4459 
4460   assert(image != NULL);
4461   assert(exception != (ExceptionInfo *) NULL);
4462 
4463   if (checkAccelerateCondition(image) == MagickFalse)
4464     return((Image *) NULL);
4465 
4466   if ((gpuSupportedResizeWeighting(GetResizeFilterWeightingType(
4467          resizeFilter)) == MagickFalse) ||
4468       (gpuSupportedResizeWeighting(GetResizeFilterWindowWeightingType(
4469          resizeFilter)) == MagickFalse))
4470     return((Image *) NULL);
4471 
4472   clEnv=getOpenCLEnvironment(exception);
4473   if (clEnv == (MagickCLEnv) NULL)
4474     return((Image *) NULL);
4475 
4476   filteredImage=ComputeResizeImage(image,clEnv,resizedColumns,resizedRows,
4477     resizeFilter,exception);
4478   return(filteredImage);
4479 }
4480 
4481 /*
4482 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4483 %                                                                             %
4484 %                                                                             %
4485 %                                                                             %
4486 %     A c c e l e r a t e R o t a t i o n a l B l u r I m a g e               %
4487 %                                                                             %
4488 %                                                                             %
4489 %                                                                             %
4490 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4491 */
4492 
ComputeRotationalBlurImage(const Image * image,MagickCLEnv clEnv,const double angle,ExceptionInfo * exception)4493 static Image* ComputeRotationalBlurImage(const Image *image,MagickCLEnv clEnv,
4494   const double angle,ExceptionInfo *exception)
4495 {
4496   cl_command_queue
4497     queue;
4498 
4499   cl_float2
4500     blurCenter;
4501 
4502   cl_int
4503     status;
4504 
4505   cl_mem
4506     cosThetaBuffer,
4507     filteredImageBuffer,
4508     imageBuffer,
4509     sinThetaBuffer;
4510 
4511   cl_kernel
4512     rotationalBlurKernel;
4513 
4514   cl_uint
4515     cossin_theta_size,
4516     number_channels;
4517 
4518   float
4519     blurRadius,
4520     *cosThetaPtr,
4521     offset,
4522     *sinThetaPtr,
4523     theta;
4524 
4525   Image
4526     *filteredImage;
4527 
4528   MagickBooleanType
4529     outputReady;
4530 
4531   MagickCLDevice
4532     device;
4533 
4534   size_t
4535     gsize[2],
4536     i;
4537 
4538   filteredImage=NULL;
4539   imageBuffer=NULL;
4540   filteredImageBuffer=NULL;
4541   sinThetaBuffer=NULL;
4542   cosThetaBuffer=NULL;
4543   rotationalBlurKernel=NULL;
4544   outputReady=MagickFalse;
4545 
4546   device=RequestOpenCLDevice(clEnv);
4547   queue=AcquireOpenCLCommandQueue(device);
4548   filteredImage=cloneImage(image,exception);
4549   if (filteredImage == (Image *) NULL)
4550     goto cleanup;
4551   if (filteredImage->number_channels != image->number_channels)
4552     goto cleanup;
4553   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4554   if (imageBuffer == (cl_mem) NULL)
4555     goto cleanup;
4556   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4557   if (filteredImageBuffer == (cl_mem) NULL)
4558     goto cleanup;
4559 
4560   blurCenter.x=(float) (image->columns-1)/2.0;
4561   blurCenter.y=(float) (image->rows-1)/2.0;
4562   blurRadius=hypot(blurCenter.x,blurCenter.y);
4563   cossin_theta_size=(unsigned int) fabs(4.0*DegreesToRadians(angle)*sqrt(
4564     (double) blurRadius)+2UL);
4565 
4566   cosThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4567   if (cosThetaPtr == (float *) NULL)
4568     goto cleanup;
4569   sinThetaPtr=AcquireQuantumMemory(cossin_theta_size,sizeof(float));
4570   if (sinThetaPtr == (float *) NULL)
4571   {
4572     cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4573     goto cleanup;
4574   }
4575 
4576   theta=DegreesToRadians(angle)/(double) (cossin_theta_size-1);
4577   offset=theta*(float) (cossin_theta_size-1)/2.0;
4578   for (i=0; i < (ssize_t) cossin_theta_size; i++)
4579   {
4580     cosThetaPtr[i]=(float)cos((double) (theta*i-offset));
4581     sinThetaPtr[i]=(float)sin((double) (theta*i-offset));
4582   }
4583 
4584   sinThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4585     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),sinThetaPtr);
4586   sinThetaPtr=RelinquishMagickMemory(sinThetaPtr);
4587   cosThetaBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_ONLY |
4588     CL_MEM_COPY_HOST_PTR,cossin_theta_size*sizeof(float),cosThetaPtr);
4589   cosThetaPtr=RelinquishMagickMemory(cosThetaPtr);
4590   if ((sinThetaBuffer == (cl_mem) NULL) || (cosThetaBuffer == (cl_mem) NULL))
4591   {
4592     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4593       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4594     goto cleanup;
4595   }
4596 
4597   rotationalBlurKernel=AcquireOpenCLKernel(device,"RotationalBlur");
4598   if (rotationalBlurKernel == (cl_kernel) NULL)
4599   {
4600     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4601       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4602     goto cleanup;
4603   }
4604 
4605   number_channels=(cl_uint) image->number_channels;
4606 
4607   i=0;
4608   status =SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4609   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint),&number_channels);
4610   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(ChannelType), &image->channel_mask);
4611   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_float2), &blurCenter);
4612   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&cosThetaBuffer);
4613   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&sinThetaBuffer);
4614   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_uint), &cossin_theta_size);
4615   status|=SetOpenCLKernelArg(rotationalBlurKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4616   if (status != CL_SUCCESS)
4617   {
4618     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4619       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4620     goto cleanup;
4621   }
4622 
4623   gsize[0]=image->columns;
4624   gsize[1]=image->rows;
4625   outputReady=EnqueueOpenCLKernel(queue,rotationalBlurKernel,2,
4626     (const size_t *) NULL,gsize,(const size_t *) NULL,image,filteredImage,
4627     MagickFalse,exception);
4628 
4629 cleanup:
4630 
4631   if (imageBuffer != (cl_mem) NULL)
4632     ReleaseOpenCLMemObject(imageBuffer);
4633   if (filteredImageBuffer != (cl_mem) NULL)
4634     ReleaseOpenCLMemObject(filteredImageBuffer);
4635   if (sinThetaBuffer != (cl_mem) NULL)
4636     ReleaseOpenCLMemObject(sinThetaBuffer);
4637   if (cosThetaBuffer != (cl_mem) NULL)
4638     ReleaseOpenCLMemObject(cosThetaBuffer);
4639   if (rotationalBlurKernel != (cl_kernel) NULL)
4640     ReleaseOpenCLKernel(rotationalBlurKernel);
4641   if (queue != (cl_command_queue) NULL)
4642     ReleaseOpenCLCommandQueue(device,queue);
4643   if (device != (MagickCLDevice) NULL)
4644     ReleaseOpenCLDevice(device);
4645   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4646     filteredImage=DestroyImage(filteredImage);
4647 
4648   return(filteredImage);
4649 }
4650 
AccelerateRotationalBlurImage(const Image * image,const double angle,ExceptionInfo * exception)4651 MagickPrivate Image* AccelerateRotationalBlurImage(const Image *image,
4652   const double angle,ExceptionInfo *exception)
4653 {
4654   Image
4655     *filteredImage;
4656 
4657   MagickCLEnv
4658     clEnv;
4659 
4660   assert(image != NULL);
4661   assert(exception != (ExceptionInfo *) NULL);
4662 
4663   if (checkAccelerateCondition(image) == MagickFalse)
4664     return((Image *) NULL);
4665 
4666   clEnv=getOpenCLEnvironment(exception);
4667   if (clEnv == (MagickCLEnv) NULL)
4668     return((Image *) NULL);
4669 
4670   filteredImage=ComputeRotationalBlurImage(image,clEnv,angle,exception);
4671   return filteredImage;
4672 }
4673 
4674 /*
4675 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4676 %                                                                             %
4677 %                                                                             %
4678 %                                                                             %
4679 %     A c c e l e r a t e U n s h a r p M a s k I m a g e                     %
4680 %                                                                             %
4681 %                                                                             %
4682 %                                                                             %
4683 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
4684 */
4685 
ComputeUnsharpMaskImage(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4686 static Image *ComputeUnsharpMaskImage(const Image *image,MagickCLEnv clEnv,
4687   const double radius,const double sigma,const double gain,
4688   const double threshold,ExceptionInfo *exception)
4689 {
4690   cl_command_queue
4691     queue;
4692 
4693   cl_int
4694     status;
4695 
4696   cl_kernel
4697     blurRowKernel,
4698     unsharpMaskBlurColumnKernel;
4699 
4700   cl_mem
4701     filteredImageBuffer,
4702     imageBuffer,
4703     imageKernelBuffer,
4704     tempImageBuffer;
4705 
4706   cl_uint
4707     imageColumns,
4708     imageRows,
4709     kernelWidth,
4710     number_channels;
4711 
4712   float
4713     fGain,
4714     fThreshold;
4715 
4716   Image
4717     *filteredImage;
4718 
4719   int
4720     chunkSize;
4721 
4722   MagickBooleanType
4723     outputReady;
4724 
4725   MagickCLDevice
4726     device;
4727 
4728   MagickSizeType
4729     length;
4730 
4731   size_t
4732     gsize[2],
4733     i,
4734     lsize[2];
4735 
4736   filteredImage=NULL;
4737   imageBuffer=NULL;
4738   filteredImageBuffer=NULL;
4739   tempImageBuffer=NULL;
4740   imageKernelBuffer=NULL;
4741   blurRowKernel=NULL;
4742   unsharpMaskBlurColumnKernel=NULL;
4743   outputReady=MagickFalse;
4744 
4745   device=RequestOpenCLDevice(clEnv);
4746   queue=AcquireOpenCLCommandQueue(device);
4747   filteredImage=cloneImage(image,exception);
4748   if (filteredImage == (Image *) NULL)
4749     goto cleanup;
4750   if (filteredImage->number_channels != image->number_channels)
4751     goto cleanup;
4752   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4753   if (imageBuffer == (cl_mem) NULL)
4754     goto cleanup;
4755   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4756   if (filteredImageBuffer == (cl_mem) NULL)
4757     goto cleanup;
4758 
4759   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4760     exception);
4761 
4762   length=image->columns*image->rows;
4763   tempImageBuffer=CreateOpenCLBuffer(device,CL_MEM_READ_WRITE,length*
4764     sizeof(cl_float4),NULL);
4765   if (tempImageBuffer == (cl_mem) NULL)
4766   {
4767     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4768       ResourceLimitWarning,"CreateOpenCLBuffer failed.",".");
4769     goto cleanup;
4770   }
4771 
4772   blurRowKernel=AcquireOpenCLKernel(device,"BlurRow");
4773   if (blurRowKernel == (cl_kernel) NULL)
4774   {
4775     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4776       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4777     goto cleanup;
4778   }
4779 
4780   unsharpMaskBlurColumnKernel=AcquireOpenCLKernel(device,
4781     "UnsharpMaskBlurColumn");
4782   if (unsharpMaskBlurColumnKernel == (cl_kernel) NULL)
4783   {
4784     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4785       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4786     goto cleanup;
4787   }
4788 
4789   number_channels=(cl_uint) image->number_channels;
4790   imageColumns=(cl_uint) image->columns;
4791   imageRows=(cl_uint) image->rows;
4792 
4793   chunkSize = 256;
4794 
4795   i=0;
4796   status =SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4797   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),&number_channels);
4798   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(ChannelType),&image->channel_mask);
4799   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4800   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4801   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4802   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4803   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_float4)*(chunkSize+kernelWidth),(void *) NULL);
4804   status|=SetOpenCLKernelArg(blurRowKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4805   if (status != CL_SUCCESS)
4806   {
4807     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4808       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4809     goto cleanup;
4810   }
4811 
4812   gsize[0]=chunkSize*((image->columns+chunkSize-1)/chunkSize);
4813   gsize[1]=image->rows;
4814   lsize[0]=chunkSize;
4815   lsize[1]=1;
4816   outputReady=EnqueueOpenCLKernel(queue,blurRowKernel,2,
4817     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4818     exception);
4819 
4820   chunkSize=256;
4821   fGain=(float) gain;
4822   fThreshold=(float) threshold;
4823 
4824   i=0;
4825   status =SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4826   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&tempImageBuffer);
4827   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),&number_channels);
4828   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(ChannelType),&image->channel_mask);
4829   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4830   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4831   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,(chunkSize+kernelWidth-1)*sizeof(cl_float4),NULL);
4832   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,kernelWidth*sizeof(float),NULL);
4833   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4834   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4835   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fGain);
4836   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(float),(void *)&fThreshold);
4837   status|=SetOpenCLKernelArg(unsharpMaskBlurColumnKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4838   if (status != CL_SUCCESS)
4839   {
4840     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4841       ResourceLimitWarning,"clEnv->library->clSetKernelArg failed.",".");
4842     goto cleanup;
4843   }
4844 
4845   gsize[0]=image->columns;
4846   gsize[1]=chunkSize*((image->rows+chunkSize-1)/chunkSize);
4847   lsize[0]=1;
4848   lsize[1]=chunkSize;
4849   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskBlurColumnKernel,2,
4850     (const size_t *) NULL,gsize,lsize,image,filteredImage,MagickFalse,
4851     exception);
4852 
4853 cleanup:
4854 
4855   if (imageBuffer != (cl_mem) NULL)
4856     ReleaseOpenCLMemObject(imageBuffer);
4857   if (filteredImageBuffer != (cl_mem) NULL)
4858     ReleaseOpenCLMemObject(filteredImageBuffer);
4859   if (tempImageBuffer != (cl_mem) NULL)
4860     ReleaseOpenCLMemObject(tempImageBuffer);
4861   if (imageKernelBuffer != (cl_mem) NULL)
4862     ReleaseOpenCLMemObject(imageKernelBuffer);
4863   if (blurRowKernel != (cl_kernel) NULL)
4864     ReleaseOpenCLKernel(blurRowKernel);
4865   if (unsharpMaskBlurColumnKernel != (cl_kernel) NULL)
4866     ReleaseOpenCLKernel(unsharpMaskBlurColumnKernel);
4867   if (queue != (cl_command_queue) NULL)
4868     ReleaseOpenCLCommandQueue(device,queue);
4869   if (device != (MagickCLDevice) NULL)
4870     ReleaseOpenCLDevice(device);
4871   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4872     filteredImage=DestroyImage(filteredImage);
4873 
4874   return(filteredImage);
4875 }
4876 
ComputeUnsharpMaskImageSingle(const Image * image,MagickCLEnv clEnv,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)4877 static Image *ComputeUnsharpMaskImageSingle(const Image *image,
4878   MagickCLEnv clEnv,const double radius,const double sigma,const double gain,
4879   const double threshold,ExceptionInfo *exception)
4880 {
4881   cl_command_queue
4882     queue;
4883 
4884   cl_int
4885     status;
4886 
4887   cl_kernel
4888     unsharpMaskKernel;
4889 
4890   cl_mem
4891     filteredImageBuffer,
4892     imageBuffer,
4893     imageKernelBuffer;
4894 
4895   cl_uint
4896     imageColumns,
4897     imageRows,
4898     kernelWidth,
4899     number_channels;
4900 
4901   float
4902     fGain,
4903     fThreshold;
4904 
4905   Image
4906     *filteredImage;
4907 
4908   MagickBooleanType
4909     outputReady;
4910 
4911   MagickCLDevice
4912     device;
4913 
4914   size_t
4915     gsize[2],
4916     i,
4917     lsize[2];
4918 
4919   filteredImage=NULL;
4920   imageBuffer=NULL;
4921   filteredImageBuffer=NULL;
4922   imageKernelBuffer=NULL;
4923   unsharpMaskKernel=NULL;
4924   outputReady=MagickFalse;
4925 
4926   device=RequestOpenCLDevice(clEnv);
4927   queue=AcquireOpenCLCommandQueue(device);
4928   filteredImage=cloneImage(image,exception);
4929   if (filteredImage == (Image *) NULL)
4930     goto cleanup;
4931   if (filteredImage->number_channels != image->number_channels)
4932     goto cleanup;
4933   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
4934   if (imageBuffer == (cl_mem) NULL)
4935     goto cleanup;
4936   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
4937   if (filteredImageBuffer == (cl_mem) NULL)
4938     goto cleanup;
4939 
4940   imageKernelBuffer=createKernelInfo(device,radius,sigma,&kernelWidth,
4941     exception);
4942 
4943   unsharpMaskKernel=AcquireOpenCLKernel(device,"UnsharpMask");
4944   if (unsharpMaskKernel == NULL)
4945   {
4946     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4947       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
4948     goto cleanup;
4949   }
4950 
4951   imageColumns=(cl_uint) image->columns;
4952   imageRows=(cl_uint) image->rows;
4953   number_channels=(cl_uint) image->number_channels;
4954   fGain=(float) gain;
4955   fThreshold=(float) threshold;
4956 
4957   i=0;
4958   status =SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
4959   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&number_channels);
4960   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(ChannelType),(void *)&image->channel_mask);
4961   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&imageKernelBuffer);
4962   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&kernelWidth);
4963   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageColumns);
4964   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_uint),(void *)&imageRows);
4965   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_float4)*(8 * (32 + kernelWidth)),(void *) NULL);
4966   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fGain);
4967   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(float),(void *)&fThreshold);
4968   status|=SetOpenCLKernelArg(unsharpMaskKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
4969   if (status != CL_SUCCESS)
4970   {
4971     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
4972       ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
4973     goto cleanup;
4974   }
4975 
4976   gsize[0]=((image->columns + 7) / 8)*8;
4977   gsize[1]=((image->rows + 31) / 32)*32;
4978   lsize[0]=8;
4979   lsize[1]=32;
4980   outputReady=EnqueueOpenCLKernel(queue,unsharpMaskKernel,2,(const size_t *) NULL,
4981     gsize,lsize,image,filteredImage,MagickFalse,exception);
4982 
4983 cleanup:
4984 
4985   if (imageBuffer != (cl_mem) NULL)
4986     ReleaseOpenCLMemObject(imageBuffer);
4987   if (filteredImageBuffer != (cl_mem) NULL)
4988     ReleaseOpenCLMemObject(filteredImageBuffer);
4989   if (imageKernelBuffer != (cl_mem) NULL)
4990     ReleaseOpenCLMemObject(imageKernelBuffer);
4991   if (unsharpMaskKernel != (cl_kernel) NULL)
4992     ReleaseOpenCLKernel(unsharpMaskKernel);
4993   if (queue != (cl_command_queue) NULL)
4994     ReleaseOpenCLCommandQueue(device,queue);
4995   if (device != (MagickCLDevice) NULL)
4996     ReleaseOpenCLDevice(device);
4997   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
4998     filteredImage=DestroyImage(filteredImage);
4999 
5000   return(filteredImage);
5001 }
5002 
AccelerateUnsharpMaskImage(const Image * image,const double radius,const double sigma,const double gain,const double threshold,ExceptionInfo * exception)5003 MagickPrivate Image *AccelerateUnsharpMaskImage(const Image *image,
5004   const double radius,const double sigma,const double gain,
5005   const double threshold,ExceptionInfo *exception)
5006 {
5007   Image
5008     *filteredImage;
5009 
5010   MagickCLEnv
5011     clEnv;
5012 
5013   assert(image != NULL);
5014   assert(exception != (ExceptionInfo *) NULL);
5015 
5016   if (checkAccelerateCondition(image) == MagickFalse)
5017     return((Image *) NULL);
5018 
5019   clEnv=getOpenCLEnvironment(exception);
5020   if (clEnv == (MagickCLEnv) NULL)
5021     return((Image *) NULL);
5022 
5023   if (radius < 12.1)
5024     filteredImage=ComputeUnsharpMaskImageSingle(image,clEnv,radius,sigma,gain,
5025       threshold,exception);
5026   else
5027     filteredImage=ComputeUnsharpMaskImage(image,clEnv,radius,sigma,gain,
5028       threshold,exception);
5029   return(filteredImage);
5030 }
5031 
ComputeWaveletDenoiseImage(const Image * image,MagickCLEnv clEnv,const double threshold,ExceptionInfo * exception)5032 static Image *ComputeWaveletDenoiseImage(const Image *image,MagickCLEnv clEnv,
5033   const double threshold,ExceptionInfo *exception)
5034 {
5035   cl_command_queue
5036     queue;
5037 
5038   const cl_int
5039     PASSES=5;
5040 
5041   const int
5042     TILESIZE=64,
5043     PAD=1<<(PASSES-1),
5044     SIZE=TILESIZE-2*PAD;
5045 
5046   cl_float
5047     thresh;
5048 
5049   cl_int
5050     status;
5051 
5052   cl_kernel
5053     denoiseKernel;
5054 
5055   cl_mem
5056     filteredImageBuffer,
5057     imageBuffer;
5058 
5059   cl_uint
5060     number_channels,
5061     width,
5062     height,
5063     max_channels;
5064 
5065   Image
5066     *filteredImage;
5067 
5068   MagickBooleanType
5069     outputReady;
5070 
5071   MagickCLDevice
5072     device;
5073 
5074   size_t
5075     goffset[2],
5076     gsize[2],
5077     i,
5078     lsize[2],
5079     passes,
5080     x;
5081 
5082   filteredImage=NULL;
5083   imageBuffer=NULL;
5084   filteredImageBuffer=NULL;
5085   denoiseKernel=NULL;
5086   queue=NULL;
5087   outputReady=MagickFalse;
5088 
5089   device=RequestOpenCLDevice(clEnv);
5090   /* Work around an issue on low end Intel devices */
5091   if (strcmp("Intel(R) HD Graphics",device->name) == 0)
5092     goto cleanup;
5093   queue=AcquireOpenCLCommandQueue(device);
5094   filteredImage=CloneImage(image,0,0,MagickTrue,
5095     exception);
5096   if (filteredImage == (Image *) NULL)
5097     goto cleanup;
5098   if (filteredImage->number_channels != image->number_channels)
5099     goto cleanup;
5100   imageBuffer=GetAuthenticOpenCLBuffer(image,device,exception);
5101   if (imageBuffer == (cl_mem) NULL)
5102     goto cleanup;
5103   filteredImageBuffer=GetAuthenticOpenCLBuffer(filteredImage,device,exception);
5104   if (filteredImageBuffer == (cl_mem) NULL)
5105     goto cleanup;
5106 
5107   denoiseKernel=AcquireOpenCLKernel(device,"WaveletDenoise");
5108   if (denoiseKernel == (cl_kernel) NULL)
5109   {
5110     (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5111       ResourceLimitWarning,"AcquireOpenCLKernel failed.",".");
5112     goto cleanup;
5113   }
5114 
5115   number_channels=(cl_uint)image->number_channels;
5116   width=(cl_uint)image->columns;
5117   height=(cl_uint)image->rows;
5118   max_channels=number_channels;
5119   if ((max_channels == 4) || (max_channels == 2))
5120     max_channels=max_channels-1;
5121   thresh=threshold;
5122   passes=(((1.0f*image->columns)*image->rows)+1999999.0f)/2000000.0f;
5123   passes=(passes < 1) ? 1 : passes;
5124 
5125   i=0;
5126   status =SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&imageBuffer);
5127   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_mem),(void *)&filteredImageBuffer);
5128   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&number_channels);
5129   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&max_channels);
5130   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_float),(void *)&thresh);
5131   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_int),(void *)&PASSES);
5132   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&width);
5133   status|=SetOpenCLKernelArg(denoiseKernel,i++,sizeof(cl_uint),(void *)&height);
5134   if (status != CL_SUCCESS)
5135     {
5136       (void) OpenCLThrowMagickException(device,exception,GetMagickModule(),
5137         ResourceLimitWarning,"SetOpenCLKernelArg failed.",".");
5138       goto cleanup;
5139     }
5140 
5141   for (x = 0; x < passes; ++x)
5142   {
5143     gsize[0]=((width+(SIZE-1))/SIZE)*TILESIZE;
5144     gsize[1]=((((height+(SIZE-1))/SIZE)+passes-1)/passes)*4;
5145     lsize[0]=TILESIZE;
5146     lsize[1]=4;
5147     goffset[0]=0;
5148     goffset[1]=x*gsize[1];
5149 
5150     outputReady=EnqueueOpenCLKernel(queue,denoiseKernel,2,goffset,gsize,lsize,
5151       image,filteredImage,MagickTrue,exception);
5152     if (outputReady == MagickFalse)
5153       break;
5154   }
5155 
5156 cleanup:
5157 
5158   if (imageBuffer != (cl_mem) NULL)
5159     ReleaseOpenCLMemObject(imageBuffer);
5160   if (filteredImageBuffer != (cl_mem) NULL)
5161     ReleaseOpenCLMemObject(filteredImageBuffer);
5162   if (denoiseKernel != (cl_kernel) NULL)
5163     ReleaseOpenCLKernel(denoiseKernel);
5164   if (queue != (cl_command_queue) NULL)
5165     ReleaseOpenCLCommandQueue(device,queue);
5166   if (device != (MagickCLDevice) NULL)
5167     ReleaseOpenCLDevice(device);
5168   if ((outputReady == MagickFalse) && (filteredImage != (Image *) NULL))
5169     filteredImage=DestroyImage(filteredImage);
5170 
5171   return(filteredImage);
5172 }
5173 
AccelerateWaveletDenoiseImage(const Image * image,const double threshold,ExceptionInfo * exception)5174 MagickPrivate Image *AccelerateWaveletDenoiseImage(const Image *image,
5175   const double threshold,ExceptionInfo *exception)
5176 {
5177   Image
5178     *filteredImage;
5179 
5180   MagickCLEnv
5181     clEnv;
5182 
5183   assert(image != NULL);
5184   assert(exception != (ExceptionInfo *)NULL);
5185 
5186   if (checkAccelerateCondition(image) == MagickFalse)
5187     return((Image *) NULL);
5188 
5189   clEnv=getOpenCLEnvironment(exception);
5190   if (clEnv == (MagickCLEnv) NULL)
5191     return((Image *) NULL);
5192 
5193   filteredImage=ComputeWaveletDenoiseImage(image,clEnv,threshold,exception);
5194 
5195   return(filteredImage);
5196 }
5197 #endif /* MAGICKCORE_OPENCL_SUPPORT */
5198