1; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \ 2; RUN: -disable-output < %s | \ 3; RUN: FileCheck -check-prefix=CODE %s 4 5; REQUIRES: pollyacc 6 7; void foo(float A[]) { 8; for (long i = 0; i < 128; i++) 9; A[i] += i; 10; 11; for (long i = 0; i < 128; i++) 12; for (long j = 0; j < 128; j++) 13; A[42] += i + j; 14; } 15 16; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (128) * sizeof(float), cudaMemcpyHostToDevice)); 17; CODE-NEXT: { 18; CODE-NEXT: dim3 k0_dimBlock(32); 19; CODE-NEXT: dim3 k0_dimGrid(4); 20; CODE-NEXT: kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_A); 21; CODE-NEXT: cudaCheckKernel(); 22; CODE-NEXT: } 23 24; CODE: { 25; CODE-NEXT: dim3 k1_dimBlock; 26; CODE-NEXT: dim3 k1_dimGrid; 27; CODE-NEXT: kernel1 <<<k1_dimGrid, k1_dimBlock>>> (dev_MemRef_A); 28; CODE-NEXT: cudaCheckKernel(); 29; CODE-NEXT: } 30 31; CODE: cudaCheckReturn(cudaMemcpy(MemRef_A, dev_MemRef_A, (128) * sizeof(float), cudaMemcpyDeviceToHost)); 32; CODE-NEXT: cudaCheckReturn(cudaFree(dev_MemRef_A)); 33; CODE-NEXT: } 34 35; CODE: # kernel0 36; CODE-NEXT: Stmt_bb4(32 * b0 + t0); 37 38; CODE: # kernel1 39; CODE-NEXT: for (int c0 = 0; c0 <= 127; c0 += 1) 40; CODE-NEXT: for (int c1 = 0; c1 <= 127; c1 += 1) 41; CODE-NEXT: Stmt_bb14(c0, c1); 42 43 44target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 45 46define void @foo(float* %A) { 47bb: 48 br label %bb3 49 50bb3: ; preds = %bb8, %bb 51 %i.0 = phi i64 [ 0, %bb ], [ %tmp9, %bb8 ] 52 %exitcond2 = icmp ne i64 %i.0, 128 53 br i1 %exitcond2, label %bb4, label %bb10 54 55bb4: ; preds = %bb3 56 %tmp = sitofp i64 %i.0 to float 57 %tmp5 = getelementptr inbounds float, float* %A, i64 %i.0 58 %tmp6 = load float, float* %tmp5, align 4 59 %tmp7 = fadd float %tmp6, %tmp 60 store float %tmp7, float* %tmp5, align 4 61 br label %bb8 62 63bb8: ; preds = %bb4 64 %tmp9 = add nuw nsw i64 %i.0, 1 65 br label %bb3 66 67bb10: ; preds = %bb3 68 br label %bb11 69 70bb11: ; preds = %bb23, %bb10 71 %i1.0 = phi i64 [ 0, %bb10 ], [ %tmp24, %bb23 ] 72 %exitcond1 = icmp ne i64 %i1.0, 128 73 br i1 %exitcond1, label %bb12, label %bb25 74 75bb12: ; preds = %bb11 76 br label %bb13 77 78bb13: ; preds = %bb20, %bb12 79 %j.0 = phi i64 [ 0, %bb12 ], [ %tmp21, %bb20 ] 80 %exitcond = icmp ne i64 %j.0, 128 81 br i1 %exitcond, label %bb14, label %bb22 82 83bb14: ; preds = %bb13 84 %tmp15 = add nuw nsw i64 %i1.0, %j.0 85 %tmp16 = sitofp i64 %tmp15 to float 86 %tmp17 = getelementptr inbounds float, float* %A, i64 42 87 %tmp18 = load float, float* %tmp17, align 4 88 %tmp19 = fadd float %tmp18, %tmp16 89 store float %tmp19, float* %tmp17, align 4 90 br label %bb20 91 92bb20: ; preds = %bb14 93 %tmp21 = add nuw nsw i64 %j.0, 1 94 br label %bb13 95 96bb22: ; preds = %bb13 97 br label %bb23 98 99bb23: ; preds = %bb22 100 %tmp24 = add nuw nsw i64 %i1.0, 1 101 br label %bb11 102 103bb25: ; preds = %bb11 104 ret void 105} 106