1#include <clc/clc.h>
2
3void __clc_amdgcn_s_waitcnt(unsigned flags);
4
5// s_waitcnt takes 16bit argument with a combined number of maximum allowed
6// pending operations:
7// [12:8] LGKM -- LDS, GDS, Konstant (SMRD), Messages
8// [7] -- undefined
9// [6:4] -- exports, GDS, and mem write
10// [3:0] -- vector memory operations
11
12// Newer clang supports __builtin_amdgcn_s_waitcnt
13#if __clang_major__ >= 5
14#  define __waitcnt(x) __builtin_amdgcn_s_waitcnt(x)
15#else
16#  define __waitcnt(x) __clc_amdgcn_s_waitcnt(x)
17_CLC_DEF void __clc_amdgcn_s_waitcnt(unsigned)  __asm("llvm.amdgcn.s.waitcnt");
18#endif
19
20_CLC_DEF _CLC_OVERLOAD void mem_fence(cl_mem_fence_flags flags) {
21  if (flags & CLK_GLOBAL_MEM_FENCE) {
22    // scalar loads are counted with LGKM but we don't know whether
23    // the compiler turned any loads to scalar
24    __waitcnt(0);
25  } else if (flags & CLK_LOCAL_MEM_FENCE)
26    __waitcnt(0xff); // LGKM is [12:8]
27}
28#undef __waitcnt
29
30// We don't have separate mechanism for read and write fences
31_CLC_DEF _CLC_OVERLOAD void read_mem_fence(cl_mem_fence_flags flags) {
32  mem_fence(flags);
33}
34
35_CLC_DEF _CLC_OVERLOAD void write_mem_fence(cl_mem_fence_flags flags) {
36  mem_fence(flags);
37}
38