1 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
2 // RUN: -fcuda-is-device -emit-llvm -o - -x hip %s \
3 // RUN: | FileCheck %s
4
5 #include "Inputs/cuda.h"
6
7 // CHECK-LABEL: test_get_workgroup_size
8 // CHECK: call align 4 dereferenceable(64) i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
9 // CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 4
10 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
11 // CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 6
12 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
13 // CHECK: getelementptr i8, i8 addrspace(4)* %{{.*}}, i32 8
14 // CHECK: load i16, i16 addrspace(4)* %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load
test_get_workgroup_size(int d,int * out)15 __device__ void test_get_workgroup_size(int d, int *out)
16 {
17 switch (d) {
18 case 0: *out = __builtin_amdgcn_workgroup_size_x(); break;
19 case 1: *out = __builtin_amdgcn_workgroup_size_y(); break;
20 case 2: *out = __builtin_amdgcn_workgroup_size_z(); break;
21 default: *out = 0;
22 }
23 }
24
25 // CHECK-DAG: [[$WS_RANGE]] = !{i16 1, i16 1025}
26