1 // RUN: echo "GPU binary would be here" > %t 2 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - | FileCheck %s 3 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -fcuda-include-gpubinary %t -o - -DNOGLOBALS \ 4 // RUN: | FileCheck %s -check-prefix=NOGLOBALS 5 // RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=NOGPUBIN 6 7 #include "Inputs/cuda.h" 8 9 #ifndef NOGLOBALS 10 // CHECK-DAG: @device_var = internal global i32 11 __device__ int device_var; 12 13 // CHECK-DAG: @constant_var = internal global i32 14 __constant__ int constant_var; 15 16 // CHECK-DAG: @shared_var = internal global i32 17 __shared__ int shared_var; 18 19 // Make sure host globals don't get internalized... 20 // CHECK-DAG: @host_var = global i32 21 int host_var; 22 // ... and that extern vars remain external. 23 // CHECK-DAG: @ext_host_var = external global i32 24 extern int ext_host_var; 25 26 // Shadows for external device-side variables are *definitions* of 27 // those variables. 28 // CHECK-DAG: @ext_device_var = internal global i32 29 extern __device__ int ext_device_var; 30 // CHECK-DAG: @ext_device_var = internal global i32 31 extern __constant__ int ext_constant_var; 32 33 void use_pointers() { 34 int *p; 35 p = &device_var; 36 p = &constant_var; 37 p = &shared_var; 38 p = &host_var; 39 p = &ext_device_var; 40 p = &ext_constant_var; 41 p = &ext_host_var; 42 } 43 44 // Make sure that all parts of GPU code init/cleanup are there: 45 // * constant unnamed string with the kernel name 46 // CHECK: private unnamed_addr constant{{.*}}kernelfunc{{.*}}\00" 47 // * constant unnamed string with GPU binary 48 // CHECK: private unnamed_addr constant{{.*}}\00" 49 // * constant struct that wraps GPU binary 50 // CHECK: @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } 51 // CHECK: { i32 1180844977, i32 1, {{.*}}, i8* null } 52 // * variable to save GPU binary handle after initialization 53 // CHECK: @__cuda_gpubin_handle = internal global i8** null 54 // * Make sure our constructor/destructor was added to global ctor/dtor list. 55 // CHECK: @llvm.global_ctors = appending global {{.*}}@__cuda_module_ctor 56 // CHECK: @llvm.global_dtors = appending global {{.*}}@__cuda_module_dtor 57 58 // Test that we build the correct number of calls to cudaSetupArgument followed 59 // by a call to cudaLaunch. 60 61 // CHECK: define{{.*}}kernelfunc 62 // CHECK: call{{.*}}cudaSetupArgument 63 // CHECK: call{{.*}}cudaSetupArgument 64 // CHECK: call{{.*}}cudaSetupArgument 65 // CHECK: call{{.*}}cudaLaunch 66 __global__ void kernelfunc(int i, int j, int k) {} 67 68 // Test that we've built correct kernel launch sequence. 69 // CHECK: define{{.*}}hostfunc 70 // CHECK: call{{.*}}cudaConfigureCall 71 // CHECK: call{{.*}}kernelfunc 72 void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); } 73 #endif 74 75 // Test that we've built a function to register kernels and global vars. 76 // CHECK: define internal void @__cuda_register_globals 77 // CHECK: call{{.*}}cudaRegisterFunction(i8** %0, {{.*}}kernelfunc 78 // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}device_var{{.*}}i32 0, i32 4, i32 0, i32 0 79 // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}constant_var{{.*}}i32 0, i32 4, i32 1, i32 0 80 // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_device_var{{.*}}i32 1, i32 4, i32 0, i32 0 81 // CHECK-DAG: call{{.*}}cudaRegisterVar(i8** %0, {{.*}}ext_constant_var{{.*}}i32 1, i32 4, i32 1, i32 0 82 // CHECK: ret void 83 84 // Test that we've built contructor.. 85 // CHECK: define internal void @__cuda_module_ctor 86 // .. that calls __cudaRegisterFatBinary(&__cuda_fatbin_wrapper) 87 // CHECK: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper 88 // .. stores return value in __cuda_gpubin_handle 89 // CHECK-NEXT: store{{.*}}__cuda_gpubin_handle 90 // .. and then calls __cuda_register_globals 91 // CHECK-NEXT: call void @__cuda_register_globals 92 93 // Test that we've created destructor. 94 // CHECK: define internal void @__cuda_module_dtor 95 // CHECK: load{{.*}}__cuda_gpubin_handle 96 // CHECK-NEXT: call void @__cudaUnregisterFatBinary 97 98 // There should be no __cuda_register_globals if we have no 99 // device-side globals, but we still need to register GPU binary. 100 // Skip GPU binary string first. 101 // NOGLOBALS: @0 = private unnamed_addr constant{{.*}} 102 // NOGLOBALS-NOT: define internal void @__cuda_register_globals 103 // NOGLOBALS: define internal void @__cuda_module_ctor 104 // NOGLOBALS: call{{.*}}cudaRegisterFatBinary{{.*}}__cuda_fatbin_wrapper 105 // NOGLOBALS-NOT: call void @__cuda_register_globals 106 // NOGLOBALS: define internal void @__cuda_module_dtor 107 // NOGLOBALS: call void @__cudaUnregisterFatBinary 108 109 // There should be no constructors/destructors if we have no GPU binary. 110 // NOGPUBIN-NOT: define internal void @__cuda_register_globals 111 // NOGPUBIN-NOT: define internal void @__cuda_module_ctor 112 // NOGPUBIN-NOT: define internal void @__cuda_module_dtor 113