1; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ 2; RUN: -disable-output < %s | \ 3; RUN: FileCheck -check-prefix=CODE %s 4 5; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-kernel-ir \ 6; RUN: -disable-output < %s | \ 7; RUN: not FileCheck %s -check-prefix=KERNEL-IR 8 9; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \ 10; RUN: FileCheck %s -check-prefix=IR 11 12; REQUIRES: pollyacc 13; 14; void foo(long A[1024], long B[1024]) { 15; for (long i = 0; i < 1024; i++) 16; A[i] += (B[i] + (long)&B[i]); 17; } 18 19; This kernel loads/stores a pointer address we model. This is a rare case, 20; were we still lack proper code-generation support. We check here that we 21; detect the invalid IR and bail out gracefully. 22 23; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i64), cudaMemcpyHostToDevice)); 24; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (1024) * sizeof(i64), cudaMemcpyHostToDevice)); 25; CODE-NEXT: { 26; CODE-NEXT: dim3 k0_dimBlock(32); 27; CODE-NEXT: dim3 k0_dimGrid(32); 28; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_B, dev_MemRef_A); 29; CODE-NEXT: cudaCheckKernel(); 30; CODE-NEXT: } 31 32; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (1024) * sizeof(i64), cudaMemcpyDeviceToHost)); 33 34; CODE: # kernel0 35; CODE-NEXT: Stmt_bb2(32 * b0 + t0); 36 37; RUN: opt %loadPolly -polly-codegen-ppcg -S < %s | \ 38; RUN: FileCheck %s -check-prefix=IR 39 40; KERNEL-IR: kernel 41 42; IR: br i1 false, label %polly.start, label %bb1 43 44target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 45 46define void @foo(i64* %A, i64* %B) { 47bb: 48 br label %bb1 49 50bb1: ; preds = %bb10, %bb 51 %i.0 = phi i64 [ 0, %bb ], [ %tmp11, %bb10 ] 52 %exitcond = icmp ne i64 %i.0, 1024 53 br i1 %exitcond, label %bb2, label %bb12 54 55bb2: ; preds = %bb1 56 %tmp = getelementptr inbounds i64, i64* %B, i64 %i.0 57 %tmp3 = load i64, i64* %tmp, align 8 58 %tmp4 = getelementptr inbounds i64, i64* %B, i64 %i.0 59 %tmp5 = ptrtoint i64* %tmp4 to i64 60 %tmp6 = add nsw i64 %tmp3, %tmp5 61 %tmp7 = getelementptr inbounds i64, i64* %A, i64 %i.0 62 %tmp8 = load i64, i64* %tmp7, align 8 63 %tmp9 = add nsw i64 %tmp8, %tmp6 64 store i64 %tmp9, i64* %tmp7, align 8 65 br label %bb10 66 67bb10: ; preds = %bb2 68 %tmp11 = add nuw nsw i64 %i.0, 1 69 br label %bb1 70 71bb12: ; preds = %bb1 72 ret void 73} 74