1 /******************************************************************************/ 2 /* */ 3 /* Part of the LLVM Project, under the Apache License v2.0 with LLVM */ 4 /* Exceptions. */ 5 /* See https://llvm.org/LICENSE.txt for license information. */ 6 /* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ 7 /* */ 8 /******************************************************************************/ 9 /* */ 10 /* This file defines GPUJIT. */ 11 /* */ 12 /******************************************************************************/ 13 14 #ifndef GPUJIT_H_ 15 #define GPUJIT_H_ 16 #include "stddef.h" 17 18 /* 19 * The following demostrates how we can use the GPURuntime library to 20 * execute a GPU kernel. 21 * 22 * char KernelString[] = "\n\ 23 * .version 1.4\n\ 24 * .target sm_10, map_f64_to_f32\n\ 25 * .entry _Z8myKernelPi (\n\ 26 * .param .u64 __cudaparm__Z8myKernelPi_data)\n\ 27 * {\n\ 28 * .reg .u16 %rh<4>;\n\ 29 * .reg .u32 %r<5>;\n\ 30 * .reg .u64 %rd<6>;\n\ 31 * cvt.u32.u16 %r1, %tid.x;\n\ 32 * mov.u16 %rh1, %ctaid.x;\n\ 33 * mov.u16 %rh2, %ntid.x;\n\ 34 * mul.wide.u16 %r2, %rh1, %rh2;\n\ 35 * add.u32 %r3, %r1, %r2;\n\ 36 * ld.param.u64 %rd1, [__cudaparm__Z8myKernelPi_data];\n\ 37 * cvt.s64.s32 %rd2, %r3;\n\ 38 * mul.wide.s32 %rd3, %r3, 4;\n\ 39 * add.u64 %rd4, %rd1, %rd3;\n\ 40 * st.global.s32 [%rd4+0], %r3;\n\ 41 * exit;\n\ 42 * }\n\ 43 * "; 44 * 45 * const char *Entry = "_Z8myKernelPi"; 46 * 47 * int main() { 48 * PollyGPUFunction *Kernel; 49 * PollyGPUContext *Context; 50 * PollyGPUDevicePtr *DevArray; 51 * int *HostData; 52 * int MemSize; 53 * 54 * int GridX = 8; 55 * int GridY = 8; 56 * 57 * int BlockX = 16; 58 * int BlockY = 16; 59 * int BlockZ = 1; 60 * 61 * MemSize = 256*64*sizeof(int); 62 * Context = polly_initContext(); 63 * DevArray = polly_allocateMemoryForDevice(MemSize); 64 * Kernel = polly_getKernel(KernelString, KernelName); 65 * 66 * void *Params[1]; 67 * void *DevPtr = polly_getDevicePtr(DevArray) 68 * Params[0] = &DevPtr; 69 * 70 * polly_launchKernel(Kernel, GridX, GridY, BlockX, BlockY, BlockZ, Params); 71 * 72 * polly_copyFromDeviceToHost(HostData, DevData, MemSize); 73 * polly_freeKernel(Kernel); 74 * polly_freeDeviceMemory(DevArray); 75 * polly_freeContext(Context); 76 * } 77 * 78 */ 79 80 typedef enum PollyGPURuntimeT { 81 RUNTIME_NONE, 82 RUNTIME_CUDA, 83 RUNTIME_CL 84 } PollyGPURuntime; 85 86 typedef struct PollyGPUContextT PollyGPUContext; 87 typedef struct PollyGPUFunctionT PollyGPUFunction; 88 typedef struct PollyGPUDevicePtrT PollyGPUDevicePtr; 89 90 typedef struct OpenCLContextT OpenCLContext; 91 typedef struct OpenCLKernelT OpenCLKernel; 92 typedef struct OpenCLDevicePtrT OpenCLDevicePtr; 93 94 typedef struct CUDAContextT CUDAContext; 95 typedef struct CUDAKernelT CUDAKernel; 96 typedef struct CUDADevicePtrT CUDADevicePtr; 97 98 PollyGPUContext *polly_initContextCUDA(); 99 PollyGPUContext *polly_initContextCL(); 100 PollyGPUFunction *polly_getKernel(const char *BinaryBuffer, 101 const char *KernelName); 102 void polly_freeKernel(PollyGPUFunction *Kernel); 103 void polly_copyFromHostToDevice(void *HostData, PollyGPUDevicePtr *DevData, 104 long MemSize); 105 void polly_copyFromDeviceToHost(PollyGPUDevicePtr *DevData, void *HostData, 106 long MemSize); 107 void polly_synchronizeDevice(); 108 void polly_launchKernel(PollyGPUFunction *Kernel, unsigned int GridDimX, 109 unsigned int GridDimY, unsigned int BlockSizeX, 110 unsigned int BlockSizeY, unsigned int BlockSizeZ, 111 void **Parameters); 112 void polly_freeDeviceMemory(PollyGPUDevicePtr *Allocation); 113 void polly_freeContext(PollyGPUContext *Context); 114 115 // Note that polly_{malloc/free}Managed are currently not used by Polly. 116 // We use them in COSMO by replacing all malloc with polly_mallocManaged and all 117 // frees with cudaFree, so we can get managed memory "automatically". 118 // Needless to say, this is a hack. 119 // Please make sure that this code is not present in Polly when 2018 rolls in. 120 // If this is still present, ping Siddharth Bhat <siddu.druid@gmail.com> 121 void *polly_mallocManaged(size_t size); 122 void polly_freeManaged(void *mem); 123 #endif /* GPUJIT_H_ */ 124