android-12.0.0_r2/s

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#ifndef __DATAGEN_H
#define __DATAGEN_H

#include "harness/compat.h"

#include <assert.h>

#include <string>
#include <memory>
#include <vector>
#include <map>
#include <fstream>
#include <algorithm>

#include "harness/mt19937.h"

#include "exceptions.h"
#include "kernelargs.h"

// ESINNS is a short name for EXPLICIT_SPECIALIZATION_IN_NON_NAMESPACE_SCOPE

#undef ESINNS

#ifdef __GNUC__

#define ESINNS
#define ESINNS_PREF() inline
#define ESINNS_POST() RandomGenerator::

#else

#define ESINNS_PREF()
#define ESINNS_POST()

#endif

#define MAX_WORK_DIM        3
#define GLOBAL_WORK_SIZE    (((CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(cl_double) / 16) / 2) * 2)            // max buffer size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE / sizeof(double16)

// SPIR definitions for image channel data types (Section 2.1.3.2).
#define SPIR_CLK_SNORM_INT8         0x10D0
#define SPIR_CLK_SNORM_INT16        0x10D1
#define SPIR_CLK_UNORM_INT8         0x10D2
#define SPIR_CLK_UNORM_INT16        0x10D3
#define SPIR_CLK_UNORM_SHORT_565    0x10D4
#define SPIR_CLK_UNORM_SHORT_555    0x10D5
#define SPIR_CLK_UNORM_SHORT_101010 0x10D6
#define SPIR_CLK_SIGNED_INT8        0x10D7
#define SPIR_CLK_SIGNED_INT16       0x10D8
#define SPIR_CLK_SIGNED_INT32       0x10D9
#define SPIR_CLK_UNSIGNED_INT8      0x10DA
#define SPIR_CLK_UNSIGNED_INT16     0x10DB
#define SPIR_CLK_UNSIGNED_INT32     0x10DC
#define SPIR_CLK_HALF_FLOAT         0x10DD
#define SPIR_CLK_FLOAT              0x10DE
#define SPIR_CLK_UNORM_INT24        0x10DF

#define NUM_IMG_FORMATS 64

double get_random_double(double low, double high, MTdata d);
float get_random_float(float low, float high, MTdata d);
size_t get_random_size_t(size_t low, size_t high, MTdata d);

/**
 Simple container for the work size information
 */
class WorkSizeInfo
{
public:
    /**
      Returns the flat global size
      */
    size_t getGlobalWorkSize() const;
public:
    cl_uint work_dim;
    size_t  global_work_offset[MAX_WORK_DIM];
    size_t  global_work_size[MAX_WORK_DIM];
    size_t  local_work_size[MAX_WORK_DIM];
};

/**
 Generates various types of random numbers
 */
class RandomGenerator
{
public:
    RandomGenerator():m_d(NULL)
    {
       init(0);
    }

    ~RandomGenerator()
    {
        if( NULL != m_d )
            free_mtdata(m_d);
    }

    void init(cl_uint seed)
    {
        m_d = init_genrand( seed );
    }

    template<class T> T getNext(T low, T high)
    {
        assert(false && "Not implemented");
        return T();
    }

#ifdef ESINNS

private:
    MTdata m_d;
};

#endif

template<> ESINNS_PREF() bool ESINNS_POST()getNext(bool low, bool high)
{
    return (bool)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_char ESINNS_POST()getNext(cl_char low, cl_char high)
{
    return (cl_char)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_double ESINNS_POST()getNext(cl_double low, cl_double high)
{
    return get_random_double(low, high, m_d);
}

template<> ESINNS_PREF() cl_float ESINNS_POST()getNext(cl_float low, cl_float high)
{
    return get_random_float(low, high, m_d);
}

template<> ESINNS_PREF() cl_int ESINNS_POST()getNext(cl_int low, cl_int high)
{
    return (cl_int)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_long ESINNS_POST()getNext(cl_long low, cl_long high)
{
    return (cl_long)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_short ESINNS_POST()getNext(cl_short low, cl_short high)
{
    return (cl_short)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_uchar ESINNS_POST()getNext(cl_uchar low, cl_uchar high)
{
    return (cl_uchar)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_uint ESINNS_POST()getNext(cl_uint low, cl_uint high)
{
    return (cl_uint)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_ulong ESINNS_POST()getNext(cl_ulong low, cl_ulong high)
{
    return (cl_ulong)get_random_size_t((size_t)low, (size_t)high, m_d);
}

template<> ESINNS_PREF() cl_ushort ESINNS_POST()getNext(cl_ushort low, cl_ushort high)
{
    return (cl_ushort)get_random_size_t((size_t)low, (size_t)high, m_d);
}

#ifndef ESINNS

private:
    MTdata m_d;
};

#endif

extern RandomGenerator gRG;

/**
 Base class for kernel argument generator
 */
class KernelArgGenerator
{
protected:
    KernelArgGenerator()
    {}

public:
    virtual KernelArg* generate( cl_context context,
                                 const WorkSizeInfo& ws,
                                 const KernelArgInfo& argInfo,
                                 const KernelArg* refArg,
                                 const cl_kernel kernel,
                                 const cl_device_id device ) = 0;
    virtual ~KernelArgGenerator() {}
};

/**
 Mock: 'Not implemented' version of the kernel argument generator - used for the still unsupported types
 */
class KernelArgGeneratorNI: public KernelArgGenerator
{
public:
    KernelArgGeneratorNI( bool isBuffer, size_t vectorSize, int minValue, int maxValue )
    {}

    KernelArg* generate( cl_context context,
                         const WorkSizeInfo& ws,
                         const KernelArgInfo& argInfo,
                         const KernelArg* refArg,
                         const cl_kernel kernel,
                         const cl_device_id device )
    {
        //assert(false && "Not implemented");
        throw Exceptions::TestError("KernelArgGenerator is not implemented\n");
    }
};

/**
 Kernel argument generator for images
 */
class KernelArgGeneratorImage: public KernelArgGenerator
{
public:
    KernelArgGeneratorImage(bool isBuffer, size_t vectorSize, char minValue, char maxValue) :
        m_isBuffer(isBuffer),
        m_vectorSize(vectorSize),
        m_minValue(minValue),
        m_maxValue(maxValue)
    {
        m_format.image_channel_order = CL_RGBA;

        m_desc.image_width = 32;
        m_desc.image_height = 1;
        m_desc.image_depth = 1;
        m_desc.image_array_size = 1;
        m_desc.num_mip_levels = 0;
        m_desc.num_samples = 0;
        m_desc.buffer = NULL;
    }

    bool isValidChannelOrder(cl_context context, cl_channel_order order) const
    {
        cl_mem_flags flags = CL_MEM_COPY_HOST_PTR;
        cl_uint actualNumFormats = 0;
        cl_image_format imgFormat = m_format;
        imgFormat.image_channel_order = order;

        cl_int error = clGetSupportedImageFormats(
            context,
            flags,
            m_desc.image_type,
            0,
            NULL,
            &actualNumFormats);
        if (CL_SUCCESS != error)
            throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);

        std::vector<cl_image_format> supportedFormats(actualNumFormats);
        error = clGetSupportedImageFormats(context, flags, m_desc.image_type,
                                           actualNumFormats,
                                           supportedFormats.data(), NULL);
        if (CL_SUCCESS != error)
            throw Exceptions::TestError("clGetSupportedImageFormats failed\n", error);

        for (size_t i=0; i<actualNumFormats; ++i)
        {
            cl_image_format curFormat = supportedFormats[i];

            if(imgFormat.image_channel_order == curFormat.image_channel_order &&
               imgFormat.image_channel_data_type == curFormat.image_channel_data_type)
               return true;
        }

        return false;
    }

    void setChannelOrder(cl_channel_order order)
    {
        m_format.image_channel_order = order;
    }

    KernelArg* generate(cl_context context,
                        const WorkSizeInfo& ws,
                        const KernelArgInfo& argInfo,
                        const KernelArg* refArg,
                        const cl_kernel kernel,
                        const cl_device_id device)
    {
        void * pBuffer = NULL;
        size_t numPixels = m_desc.image_width * m_desc.image_height * m_desc.image_depth * m_desc.image_array_size;
        const int alignment = sizeof(cl_int) * 4 ; //RGBA channel size * sizeof (cl_int)
        size_t allocSize = numPixels * alignment ;

        cl_kernel_arg_access_qualifier accessQ = argInfo.getAccessQualifier();

        cl_mem_flags mem_flags = 0;

        if (accessQ == CL_KERNEL_ARG_ACCESS_READ_ONLY)
        {
            mem_flags |=  CL_MEM_READ_ONLY;
        }

        if (accessQ == CL_KERNEL_ARG_ACCESS_WRITE_ONLY)
        {
            mem_flags |=  CL_MEM_WRITE_ONLY;
        }

        if (accessQ == CL_KERNEL_ARG_ACCESS_READ_WRITE)
        {
            mem_flags |=  CL_MEM_READ_WRITE;
        }

        pBuffer = align_malloc(allocSize, alignment);
        if (NULL == pBuffer)
        {
            throw Exceptions::TestError("align_malloc failed for image\n", 1);
        }
        assert( (size_t)pBuffer % alignment == 0 );
        if (NULL == refArg)
        {
            fillBuffer((cl_char *)pBuffer, allocSize );
        }
        else {
            memcpy(pBuffer, refArg->getBuffer(), allocSize );
        }

        return new KernelArgImage(context, argInfo, pBuffer, allocSize, mem_flags, m_format, m_desc);
    }

protected:
    KernelArgGeneratorImage()
    {}

    void fillBuffer( cl_char * ptr, size_t nelem)
    {
        for( size_t i = 0; i < nelem; ++i )
        {
            ptr[i]  = gRG.getNext<cl_char>(m_minValue, m_maxValue);
        }
    }

protected:
    bool m_isBuffer;
    size_t m_vectorSize;
    cl_char m_minValue;
    cl_char m_maxValue;
    cl_image_format m_format;
    cl_image_desc m_desc;
};

/**
 Kernel argument generator for image1d_array
 */
template<cl_channel_type channel_type> class KernelArgGeneratorImage1dArray: public KernelArgGeneratorImage
{
public:
    KernelArgGeneratorImage1dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ):
        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
    {
        m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY;
        m_format.image_channel_data_type = channel_type;

        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
        m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;

    }
};

/**
 Kernel argument generator for image1d_buffer
 */
template<cl_channel_type channel_type> class KernelArgGeneratorImage1dBuffer: public KernelArgGeneratorImage
{
public:
    KernelArgGeneratorImage1dBuffer( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
    {
        m_desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER;
        m_format.image_channel_data_type = channel_type;

        m_desc.image_row_pitch = m_desc.image_width*4*4; //RGBA channel size * sizeof (cl_int)
        // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf 5.2.2;
        // Slice pitch of 1d images should be zero.
        m_desc.image_slice_pitch = 0;
    }
};

/**
 Kernel argument generator for image1d
 */
template<cl_channel_type channel_type> class KernelArgGeneratorImage1d: public KernelArgGeneratorImage
{
public:
    KernelArgGeneratorImage1d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
    {
        m_desc.image_type = CL_MEM_OBJECT_IMAGE1D;
        m_format.image_channel_data_type = channel_type;

        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
        // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
        // '5.3.1.2 image descriptor': Slice pitch is not applicable for one-
        // dimensional images.
        m_desc.image_slice_pitch = 0;
    }
};

/**
 Kernel argument generator for image2d_array
 */
template<cl_channel_type channel_type> class KernelArgGeneratorImage2dArray: public KernelArgGeneratorImage
{
public:
    KernelArgGeneratorImage2dArray( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
    {
        m_desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY;
        m_format.image_channel_data_type = channel_type;

        m_desc.image_height = 32;
        m_desc.image_array_size = 8;
        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
        m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
    }
};

/**
 Kernel argument generator for image2d
 */
template<cl_channel_type channel_type> class KernelArgGeneratorImage2d: public KernelArgGeneratorImage
{
public:
    KernelArgGeneratorImage2d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
    {
        m_desc.image_type = CL_MEM_OBJECT_IMAGE2D;
        m_format.image_channel_data_type = channel_type;

        m_desc.image_height = 32;
        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
        // http://www.khronos.org/registry/cl/specs/opencl-1.2.pdf
        // '5.3.1.2 image descriptor': Slice pitch is not applicable for two-
        // dimensional images.
        m_desc.image_slice_pitch = 0;
    }
};

/**
 Kernel argument generator for image3d
 */
template<cl_channel_type channel_type> class KernelArgGeneratorImage3d: public KernelArgGeneratorImage
{
public:
    KernelArgGeneratorImage3d( bool isBuffer, size_t vectorSize, char minValue, char maxValue ) :
        KernelArgGeneratorImage(isBuffer, vectorSize, minValue, maxValue)
    {
        m_desc.image_type = CL_MEM_OBJECT_IMAGE3D;
        m_format.image_channel_data_type = channel_type;

        m_desc.image_height = 32;
        m_desc.image_depth = 8;
        m_desc.image_row_pitch = m_desc.image_width*4*4;                        //RGBA channel size * sizeof (cl_int)
        m_desc.image_slice_pitch = m_desc.image_height * m_desc.image_row_pitch;
    }
};

/**
 Kernel argument generator for samplers
 */
class KernelArgGeneratorSampler: public KernelArgGenerator
{
public:
    KernelArgGeneratorSampler(bool isBuffer, size_t vectorSize, int minValue, int maxValue);

    KernelArgGeneratorSampler();

    /*
     * Sampler property setters.
     */
    void setNormalized(cl_bool);
    void setAddressingMode(cl_addressing_mode);
    void setFiterMode(cl_filter_mode);

    KernelArg* generate(cl_context context,
                        const WorkSizeInfo& ws,
                        const KernelArgInfo& argInfo,
                        const KernelArg* refArg,
                        const cl_kernel kernel,
                        const cl_device_id device)
    {
        return new KernelArgSampler(context, m_normalized, m_addressingMode, m_filterMode);
    }
private:
    void initToDefaults();

    cl_bool m_normalized;
    cl_addressing_mode m_addressingMode;
    cl_filter_mode m_filterMode;
};

/*
 * Generates all the possible values for image samplers.
 */
class SamplerValuesGenerator
{
public:
    class iterator {
        friend class SamplerValuesGenerator;

        size_t m_normIndex, m_filterIndex, m_addressingModeIndex;

        iterator(size_t norm, size_t filter, size_t addressing);

        bool incrementIndex(size_t &i, const size_t limit);
    public:
        iterator();

        /*
         * Moves the iterator to the next sampler value.
         */
        iterator& operator ++();

        bool operator == (const iterator& other) const;

        bool operator != (const iterator& other) const;

        cl_bool getNormalized() const;

        cl_filter_mode getFilterMode() const;

        cl_addressing_mode getAddressingMode() const;

        /*
         * Converts the value of the sampler to a bitmask representation.
         */
        unsigned toBitmap() const;

        /*
         * Retruns a string representation of the sampler.
         */
        std::string toString() const;
    };

    iterator begin() { return iterator(); }

    iterator end();

    static cl_bool coordNormalizations[];
    static cl_filter_mode filterModes[];
    static cl_addressing_mode addressingModes[];
};

typedef struct struct_type {
    cl_float float4d[4];
    cl_int intd;
} typedef_struct_type;

typedef struct {
    cl_int width;
    cl_int channelType;
    cl_int channelOrder;
    cl_int expectedChannelType;
    cl_int expectedChannelOrder;
 } image_kernel_data;

typedef struct testStruct {
     cl_double vec[16];
 } testStruct;

typedef struct {
     cl_uint workDim;
     cl_uint globalSize[3];
     cl_uint globalID[3];
     cl_uint localSize[3];
     cl_uint localID[3];
     cl_uint numGroups[3];
     cl_uint groupID[3];
  } work_item_data;

/**
 Kernel argument generator for structure "struct_type"

 Kernel argument generator for structure "image_kernel_data"

 Kernel argument generator for structure "testStruct"
 Since there are many "testStruct", we define it to have maximum space
 Also the alignment is done following the "worst" case

 Kernel argument generator for structure "work_item_data"
 */

  template<typename T> class KernelStructTypeArgGenerator: public KernelArgGenerator
  {

  public:
      KernelStructTypeArgGenerator( bool isBuffer, size_t vectorSize, cl_int minValue, cl_int maxValue ):
          m_isBuffer(isBuffer),
          m_vectorSize(vectorSize),
          m_alignment(0),
          m_size(0)
      {}

      KernelArg* generate( cl_context context,
                                 const WorkSizeInfo& ws,
                                 const KernelArgInfo& argInfo,
                                 const KernelArg* refArg,
                                 const cl_kernel kernel,
                                 const cl_device_id device )
      {
          T *pStruct = NULL;

          calcSizeAndAlignment(pStruct);
          size_t size = m_size;

          if( m_isBuffer )
          {
              cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();

              if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
              {
                  if ( (CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size < m_size )
                      size=(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE/m_size)*m_size;
              }

              if( CL_KERNEL_ARG_ADDRESS_GLOBAL   == addrQ ||
                  CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
              {
                  size_t no_e = ws.getGlobalWorkSize();
                  size = no_e * m_size;
                  pStruct = (T *)align_malloc(size, m_alignment);
                  if (NULL == pStruct)
                  {
                       throwExceptions(pStruct);
                  }
                  assert( (size_t)pStruct % m_alignment == 0 );
                   if (NULL == refArg)
                  {
                      fillBuffer(pStruct, no_e);
                  }
                  else {
                      memcpy(pStruct, refArg->getBuffer(), size);
                  }
              }
              return new KernelArgBuffer( context, argInfo, (void*)pStruct, size);
          }
          else {
              pStruct = (T *)align_malloc(m_size, m_alignment);
              if (NULL == pStruct)
              {
                   throwExceptions(pStruct);
              }
              assert( (size_t)pStruct % m_alignment == 0 );
              if (NULL == refArg)
              {
                  fillBuffer(pStruct, 1);
              }
              else {
                  memcpy(pStruct, refArg->getBuffer(), m_size);
              }

              return new KernelArg( argInfo, (void*)pStruct, m_size);
          }
      }
  private:

      std::string getTypeString(typedef_struct_type *pStruct)
      {
          return "typedef_struct_type";
      }

      std::string getTypeString(image_kernel_data *pStruct)
      {
          return "image_kernel_data";
      }

      std::string getTypeString(testStruct *pStruct)
      {
          return "testStruct";
      }

      std::string getTypeString(work_item_data *pStruct)
      {
          return "work_item_data";
      }

      void throwExceptions(T * pStruct)
      {
          std::string str = "align_malloc failed for " ;
          if (m_isBuffer)
              str += "array of " ;
          str += getTypeString(pStruct) ;
          throw Exceptions::TestError(str, 1);
      }

      void fillBuffer( typedef_struct_type *pStruct, size_t no_e )
      {
          for (size_t e = 0; e < no_e; ++e)
          {
              for( size_t i = 0; i < 4; ++i )
              {
                  pStruct[e].float4d[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
              }
              pStruct[e].intd = gRG.getNext<cl_int>(0, 0x7fffffff);
          }
      }

      void fillBuffer( image_kernel_data *pStruct, size_t no_e )
      {
          for (size_t e = 0; e < no_e; ++e)
          {
              pStruct[e].width = gRG.getNext<cl_int>(0, 0x7fffffff);
              pStruct[e].channelType = gRG.getNext<cl_int>(0, 0x7fffffff);
              pStruct[e].channelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
              pStruct[e].expectedChannelType = gRG.getNext<cl_int>(0, 0x7fffffff);
              pStruct[e].expectedChannelOrder = gRG.getNext<cl_int>(0, 0x7fffffff);
          }
      }

      void fillBuffer( testStruct *pStruct, size_t no_e )
      {
          for (size_t e = 0; e < no_e; ++e)
          {
              for( size_t i = 0; i < 16; ++i )
              {
                  pStruct[e].vec[i] = gRG.getNext<cl_float>(-0x01000000, 0x01000000);
               }
          }
      }

      void fillBuffer( work_item_data *pStruct, size_t no_e )
      {
          for (size_t e = 0; e < no_e; ++e)
          {
              memset(&pStruct[e], 0, sizeof(work_item_data));
          }
      }

      // structure alignment is derived from the size of the larger field in it
      // size of the structure is the size of the largest field multiple by the number of fields

      void calcSizeAndAlignment(typedef_struct_type *pStruct)
      {
          m_alignment = sizeof(cl_float) * 4;
          m_size = m_alignment * 2 ;
      }

      void calcSizeAndAlignment(image_kernel_data *pStruct)
      {
          m_alignment = sizeof(cl_int);
          m_size = sizeof(image_kernel_data) ;
      }

      void calcSizeAndAlignment(testStruct *pStruct)
      {
          m_alignment = sizeof(cl_double) * 16;
          m_size = m_alignment;
      }

      void calcSizeAndAlignment(work_item_data *pStruct)
      {
          m_alignment = sizeof(cl_uint);
          m_size = sizeof(work_item_data);
      }

  private:
      bool m_isBuffer;
      size_t m_vectorSize;
      int m_alignment;
      size_t m_size;
};

/**
 Kernel argument generator for the simple scalar and vector types
 */
template <class T> class KernelArgGeneratorT: public KernelArgGenerator
{
public:
    KernelArgGeneratorT( bool isBuffer, size_t vectorSize, T minValue, T maxValue ):
        m_isBuffer(isBuffer),
        m_vectorSize(vectorSize),
        m_minValue(minValue),
        m_maxValue(maxValue)
    {}

    KernelArg* generate( cl_context context,
                         const WorkSizeInfo& ws,
                         const KernelArgInfo& argInfo,
                         const KernelArg* refArg,
                         const cl_kernel kernel,
                         const cl_device_id device  )
    {
        T* pBuffer = NULL;
        size_t size = 0;
        int alignment, error;
        cl_ulong totalDeviceLocalMem;
        cl_ulong localMemUsedByKernel;
        cl_uint numArgs, numLocalArgs = 0;
        KernelArgInfo kernel_arg_info;

        error = CL_SUCCESS;

        // take care of 3-elements vector's alignment issue:
        // if 3-elements vector - the alignment is 4-elements
        if (m_vectorSize == 3)
            alignment = sizeof(T) * 4;
        else
            alignment = sizeof(T) * m_vectorSize;

        // gather information about the kernel and device
        clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(totalDeviceLocalMem), &totalDeviceLocalMem, NULL);
        clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(localMemUsedByKernel), &localMemUsedByKernel, NULL);
        clGetKernelInfo(kernel, CL_KERNEL_NUM_ARGS, sizeof(numArgs), &numArgs, NULL);

        // Calculate the number of local memory arguments
        for (cl_uint i = 0; i < numArgs; i ++)
        {
            error = clGetKernelArgInfo( kernel, i, CL_KERNEL_ARG_ADDRESS_QUALIFIER, sizeof(cl_kernel_arg_address_qualifier), kernel_arg_info.getAddressQualifierRef(), &size);
            if( error != CL_SUCCESS )
            {
                throw Exceptions::TestError("Unable to get argument address qualifier\n", error);
            }

            if(kernel_arg_info.getAddressQualifier() == CL_KERNEL_ARG_ADDRESS_LOCAL)
            {
                numLocalArgs ++;
            }
        }

        // reduce the amount of local memory by the amount the kernel + implementation uses
        totalDeviceLocalMem -= localMemUsedByKernel;

        if( m_isBuffer )
        {
            cl_kernel_arg_address_qualifier addrQ = argInfo.getAddressQualifier();

            // decide about the buffer size - take into account the alignment and padding
            size = ws.getGlobalWorkSize() * alignment;

            // reduce the size of the buffer for local memory
            if (numLocalArgs &&
                size > floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs)) &&
                addrQ == CL_KERNEL_ARG_ADDRESS_LOCAL)
            {
                size = floor(static_cast<double>(totalDeviceLocalMem / numLocalArgs));
            }

            if( CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
            {
                if ( CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE < size )
                    size = CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE;
            }

            if( CL_KERNEL_ARG_ADDRESS_GLOBAL   == addrQ ||
                CL_KERNEL_ARG_ADDRESS_CONSTANT == addrQ )
            {
                pBuffer = (T *)align_malloc(size, alignment);
                if (NULL == pBuffer)
                {
                     throw Exceptions::TestError("align_malloc failed for array buffer\n", 1);
                }
                assert( (size_t)pBuffer % alignment == 0 );
                if (NULL == refArg)
                {
                    fillBuffer(pBuffer, size / sizeof(T));
                }
                else {
                    memcpy(pBuffer, refArg->getBuffer(), size);
                }
            }
            return new KernelArgBuffer( context, argInfo, (void*)pBuffer, size);
        }
        else
        {
            if (m_vectorSize == 3)
                size = sizeof(T) * 4;
            else
                size = sizeof(T) * m_vectorSize;

            pBuffer = (T *)align_malloc(size, alignment);
            if (NULL == pBuffer)
            {
                throw Exceptions::TestError("align_malloc failed for pBuffer\n", 1);
            }
            assert( (size_t)pBuffer % alignment == 0 );
            if (NULL == refArg)
            {
                fillBuffer(pBuffer, m_vectorSize);
            }
            else {
                memcpy(pBuffer, refArg->getBuffer(), size);
            }
            return new KernelArg( argInfo, (void*)pBuffer, size);
        }
    }
private:
    void fillBuffer( T* buffer, size_t nelem)
    {
        for( size_t i = 0; i < nelem; ++i )
        {
            buffer[i]  = gRG.getNext<T>(m_minValue, m_maxValue);
        }
    }

private:
    bool m_isBuffer;
    size_t m_vectorSize;
    T    m_minValue;
    T    m_maxValue;
};

/**
 General facade for the kernel arguments generation functionality.
 */
class DataGenerator
{
public:
     static DataGenerator* getInstance();

    ~DataGenerator();

    KernelArg* generateKernelArg(cl_context context,
                                 const KernelArgInfo& argInfo,
                                 const WorkSizeInfo& ws,
                                 const KernelArg* refArg,
                                 const cl_kernel kernel,
                                 const cl_device_id device)
    {
        KernelArgGenerator* pArgGenerator = getArgGenerator(argInfo);
        return pArgGenerator->generate(context, ws, argInfo, refArg, kernel, device);
    }

    /*
     * Gets the generator associated to the given key.
     */
    KernelArgGenerator* getArgGenerator(const KernelArgInfo& argInfo);

    /*
     * Sets the entry associated to the given key, with the given prototype
     * generator.
     */
    void setArgGenerator(const KernelArgInfo& key, KernelArgGenerator* gen);

private:
    DataGenerator();

    static DataGenerator *Instance;

    typedef std::map<std::string, KernelArgGenerator*> ArgGeneratorsMap;
    ArgGeneratorsMap m_argGenerators;
};

class ImageValuesGenerator
{
public:
    class iterator
    {
        friend class ImageValuesGenerator;
    public:
        /*
         * Iterator operators.
         */
        iterator& operator ++();
        bool operator == (const iterator&) const;
        bool operator != (const iterator&) const;
        /*
         * Returns the name of the basic image type (e.g., image2d_t).
         */
        std::string getImageTypeName() const;

        /*
         * Returns the name of the genrator that generates images of this type
         * (e.g., imaget2d_float).
         */
        std::string getImageGeneratorName() const;

        /*
         * Returns the name of the genrator that generates images of the 'base'
         * type (e.g., imaget2d_t).
         */
        std::string getBaseImageGeneratorName() const;

        /*
         * Returns the OpenCL enumeration for the channel order of the image
         * object this iterator creates.
         */
        int getOpenCLChannelOrder() const;

        /*
         * Returns the SPIR enumeration for the channel order of the image
         * object this iterator creates.
         */
        int getSPIRChannelOrder() const;

        /*
         * Returns the data type of the image object this iterator creates. (e.g.,
         * cl_float, cl_int).
         */
        int getDataType() const;

        /*
         * Returns the data type of the image object this iterator creates. (e.g.,
         * float, int), in string format.
         */
        std::string getDataTypeName() const;

        std::string toString() const;
    private:
        /*
         * Constructor for creating a 'begin' iterator.
         */
        iterator(ImageValuesGenerator*);
        /*
         * Constructor for creating an 'end' iterator.
         */
        iterator(int);
        /*
        * Increments the given argument up to the given limit.
        * In case the new value reaches the limit, the index is reset to hold zero.
        * Returns: true if the value of the index was incremented, false if it was reset
        * to zero.
        */
        bool incrementIndex(size_t& index, size_t limit);

        /*
         * Returns true is the index combination of this iterator is legal,
         * or false otherwise.
         */
        bool isLegalCombination() const;

        ImageValuesGenerator* m_parent;
        size_t m_channelIndex, m_imgTyIndex;
    }; //End iterator.

    iterator begin();
    iterator end();

    static cl_channel_order channelOrders[];
    static const char* imageTypes[];
private:
    WorkSizeInfo  m_wsInfo;
};

#endif