1#include <clc/clc.h>
2
3#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
4  typedef PRIM_TYPE##2 less_aligned_##ADDR_SPACE##PRIM_TYPE##2 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
5  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
6    return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[2*offset])); \
7  } \
8\
9  typedef PRIM_TYPE##3 less_aligned_##ADDR_SPACE##PRIM_TYPE##3 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
10  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
11    PRIM_TYPE##2 vec = *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##2*) (&x[3*offset])); \
12    return (PRIM_TYPE##3)(vec.s0, vec.s1, x[offset*3+2]); \
13  } \
14\
15  typedef PRIM_TYPE##4 less_aligned_##ADDR_SPACE##PRIM_TYPE##4 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
16  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
17    return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##4*) (&x[4*offset])); \
18  } \
19\
20  typedef PRIM_TYPE##8 less_aligned_##ADDR_SPACE##PRIM_TYPE##8 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
21  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
22    return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##8*) (&x[8*offset])); \
23  } \
24\
25  typedef PRIM_TYPE##16 less_aligned_##ADDR_SPACE##PRIM_TYPE##16 __attribute__ ((aligned (sizeof(PRIM_TYPE))));\
26  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
27    return *((const ADDR_SPACE less_aligned_##ADDR_SPACE##PRIM_TYPE##16*) (&x[16*offset])); \
28  } \
29
30#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
31    VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __private) \
32    VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __local) \
33    VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __constant) \
34    VLOAD_VECTORIZE(__CLC_SCALAR_GENTYPE, __global) \
35
36#define VLOAD_TYPES() \
37    VLOAD_ADDR_SPACES(char) \
38    VLOAD_ADDR_SPACES(uchar) \
39    VLOAD_ADDR_SPACES(short) \
40    VLOAD_ADDR_SPACES(ushort) \
41    VLOAD_ADDR_SPACES(int) \
42    VLOAD_ADDR_SPACES(uint) \
43    VLOAD_ADDR_SPACES(long) \
44    VLOAD_ADDR_SPACES(ulong) \
45    VLOAD_ADDR_SPACES(float) \
46
47VLOAD_TYPES()
48
49#ifdef cl_khr_fp64
50#pragma OPENCL EXTENSION cl_khr_fp64 : enable
51    VLOAD_ADDR_SPACES(double)
52#endif
53#ifdef cl_khr_fp16
54#pragma OPENCL EXTENSION cl_khr_fp16 : enable
55    VLOAD_ADDR_SPACES(half)
56#endif
57
58/* vload_half are legal even without cl_khr_fp16 */
59/* no vload_half for double */
60#if __clang_major__ < 6
61float __clc_vload_half_float_helper__constant(const __constant half *);
62float __clc_vload_half_float_helper__global(const __global half *);
63float __clc_vload_half_float_helper__local(const __local half *);
64float __clc_vload_half_float_helper__private(const __private half *);
65
66#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
67#else
68#define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]);
69#endif
70
71#define VEC_LOAD2(val, AS) \
72	VEC_LOAD1(val.lo, AS) \
73	VEC_LOAD1(val.hi, AS)
74#define VEC_LOAD3(val, AS) \
75	VEC_LOAD1(val.s0, AS) \
76	VEC_LOAD1(val.s1, AS) \
77	VEC_LOAD1(val.s2, AS)
78#define VEC_LOAD4(val, AS) \
79	VEC_LOAD2(val.lo, AS) \
80	VEC_LOAD2(val.hi, AS)
81#define VEC_LOAD8(val, AS) \
82	VEC_LOAD4(val.lo, AS) \
83	VEC_LOAD4(val.hi, AS)
84#define VEC_LOAD16(val, AS) \
85	VEC_LOAD8(val.lo, AS) \
86	VEC_LOAD8(val.hi, AS)
87
88#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
89  _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
90    offset *= VEC_SIZE; \
91    TYPE __tmp; \
92    VEC_LOAD##VEC_SIZE(__tmp, AS) \
93    return __tmp; \
94  } \
95  _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS half *mem) { \
96    offset *= OFFSET_SIZE; \
97    TYPE __tmp; \
98    VEC_LOAD##VEC_SIZE(__tmp, AS) \
99    return __tmp; \
100  }
101
102#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS)
103
104#define __CLC_BODY "vload_half.inc"
105#include <clc/math/gentype.inc>
106#undef __CLC_BODY
107#undef FUNC
108#undef __FUNC
109#undef VEC_LOAD16
110#undef VEC_LOAD8
111#undef VEC_LOAD4
112#undef VEC_LOAD3
113#undef VEC_LOAD2
114#undef VEC_LOAD1
115#undef VLOAD_TYPES
116#undef VLOAD_ADDR_SPACES
117#undef VLOAD_VECTORIZE
118