1 /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24 #ifndef __CUDA_BUILTIN_VARS_H 25 #define __CUDA_BUILTIN_VARS_H 26 27 // The file implements built-in CUDA variables using __declspec(property). 28 // https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx 29 // All read accesses of built-in variable fields get converted into calls to a 30 // getter function which in turn would call appropriate builtin to fetch the 31 // value. 32 // 33 // Example: 34 // int x = threadIdx.x; 35 // IR output: 36 // %0 = call i32 @llvm.ptx.read.tid.x() #3 37 // PTX output: 38 // mov.u32 %r2, %tid.x; 39 40 #define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \ 41 __declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \ 42 static inline __attribute__((always_inline)) \ 43 __attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \ 44 return INTRINSIC; \ 45 } 46 47 #if __cplusplus >= 201103L 48 #define __DELETE =delete 49 #else 50 #define __DELETE 51 #endif 52 53 // Make sure nobody can create instances of the special varible types. nvcc 54 // also disallows taking address of special variables, so we disable address-of 55 // operator as well. 56 #define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \ 57 __attribute__((device)) TypeName() __DELETE; \ 58 __attribute__((device)) TypeName(const TypeName &) __DELETE; \ 59 __attribute__((device)) void operator=(const TypeName &) const __DELETE; \ 60 __attribute__((device)) TypeName *operator&() const __DELETE 61 62 struct __cuda_builtin_threadIdx_t { 63 __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_tid_x()); 64 __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_tid_y()); 65 __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_tid_z()); 66 private: 67 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); 68 }; 69 70 struct __cuda_builtin_blockIdx_t { 71 __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ctaid_x()); 72 __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ctaid_y()); 73 __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ctaid_z()); 74 private: 75 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); 76 }; 77 78 struct __cuda_builtin_blockDim_t { 79 __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_ntid_x()); 80 __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_ntid_y()); 81 __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_ntid_z()); 82 private: 83 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); 84 }; 85 86 struct __cuda_builtin_gridDim_t { 87 __CUDA_DEVICE_BUILTIN(x,__builtin_ptx_read_nctaid_x()); 88 __CUDA_DEVICE_BUILTIN(y,__builtin_ptx_read_nctaid_y()); 89 __CUDA_DEVICE_BUILTIN(z,__builtin_ptx_read_nctaid_z()); 90 private: 91 __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); 92 }; 93 94 #define __CUDA_BUILTIN_VAR \ 95 extern const __attribute__((device)) __attribute__((weak)) 96 __CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx; 97 __CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx; 98 __CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim; 99 __CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim; 100 101 // warpSize should translate to read of %WARP_SZ but there's currently no 102 // builtin to do so. According to PTX v4.2 docs 'to date, all target 103 // architectures have a WARP_SZ value of 32'. 104 __attribute__((device)) const int warpSize = 32; 105 106 #undef __CUDA_DEVICE_BUILTIN 107 #undef __CUDA_BUILTIN_VAR 108 #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS 109 110 #endif /* __CUDA_BUILTIN_VARS_H */ 111