1;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
2
3; CHECK: {{^}}main1:
4; CHECK: MOV * T{{[0-9]+\.[XYZW], KC0}}
5define void @main1() {
6main_body:
7  %0 = load <4 x float>, <4 x float> addrspace(8)* null
8  %1 = extractelement <4 x float> %0, i32 0
9  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
10  %3 = extractelement <4 x float> %2, i32 0
11  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
12  %5 = extractelement <4 x float> %4, i32 0
13  %6 = fcmp ogt float %1, 0.000000e+00
14  %7 = select i1 %6, float %3, float %5
15  %8 = load <4 x float>, <4 x float> addrspace(8)* null
16  %9 = extractelement <4 x float> %8, i32 1
17  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
18  %11 = extractelement <4 x float> %10, i32 1
19  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
20  %13 = extractelement <4 x float> %12, i32 1
21  %14 = fcmp ogt float %9, 0.000000e+00
22  %15 = select i1 %14, float %11, float %13
23  %16 = load <4 x float>, <4 x float> addrspace(8)* null
24  %17 = extractelement <4 x float> %16, i32 2
25  %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
26  %19 = extractelement <4 x float> %18, i32 2
27  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
28  %21 = extractelement <4 x float> %20, i32 2
29  %22 = fcmp ogt float %17, 0.000000e+00
30  %23 = select i1 %22, float %19, float %21
31  %24 = load <4 x float>, <4 x float> addrspace(8)* null
32  %25 = extractelement <4 x float> %24, i32 3
33  %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
34  %27 = extractelement <4 x float> %26, i32 3
35  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
36  %29 = extractelement <4 x float> %28, i32 3
37  %30 = fcmp ogt float %25, 0.000000e+00
38  %31 = select i1 %30, float %27, float %29
39  %32 = call float @llvm.AMDGPU.clamp.f32(float %7, float 0.000000e+00, float 1.000000e+00)
40  %33 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00)
41  %34 = call float @llvm.AMDGPU.clamp.f32(float %23, float 0.000000e+00, float 1.000000e+00)
42  %35 = call float @llvm.AMDGPU.clamp.f32(float %31, float 0.000000e+00, float 1.000000e+00)
43  %36 = insertelement <4 x float> undef, float %32, i32 0
44  %37 = insertelement <4 x float> %36, float %33, i32 1
45  %38 = insertelement <4 x float> %37, float %34, i32 2
46  %39 = insertelement <4 x float> %38, float %35, i32 3
47  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
48  ret void
49}
50
51; CHECK: {{^}}main2:
52; CHECK-NOT: MOV
53define void @main2() {
54main_body:
55  %0 = load <4 x float>, <4 x float> addrspace(8)* null
56  %1 = extractelement <4 x float> %0, i32 0
57  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
58  %3 = extractelement <4 x float> %2, i32 0
59  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
60  %5 = extractelement <4 x float> %4, i32 1
61  %6 = fcmp ogt float %1, 0.000000e+00
62  %7 = select i1 %6, float %3, float %5
63  %8 = load <4 x float>, <4 x float> addrspace(8)* null
64  %9 = extractelement <4 x float> %8, i32 1
65  %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
66  %11 = extractelement <4 x float> %10, i32 0
67  %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
68  %13 = extractelement <4 x float> %12, i32 1
69  %14 = fcmp ogt float %9, 0.000000e+00
70  %15 = select i1 %14, float %11, float %13
71  %16 = load <4 x float>, <4 x float> addrspace(8)* null
72  %17 = extractelement <4 x float> %16, i32 2
73  %18 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
74  %19 = extractelement <4 x float> %18, i32 3
75  %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
76  %21 = extractelement <4 x float> %20, i32 2
77  %22 = fcmp ogt float %17, 0.000000e+00
78  %23 = select i1 %22, float %19, float %21
79  %24 = load <4 x float>, <4 x float> addrspace(8)* null
80  %25 = extractelement <4 x float> %24, i32 3
81  %26 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
82  %27 = extractelement <4 x float> %26, i32 3
83  %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
84  %29 = extractelement <4 x float> %28, i32 2
85  %30 = fcmp ogt float %25, 0.000000e+00
86  %31 = select i1 %30, float %27, float %29
87  %32 = call float @llvm.AMDGPU.clamp.f32(float %7, float 0.000000e+00, float 1.000000e+00)
88  %33 = call float @llvm.AMDGPU.clamp.f32(float %15, float 0.000000e+00, float 1.000000e+00)
89  %34 = call float @llvm.AMDGPU.clamp.f32(float %23, float 0.000000e+00, float 1.000000e+00)
90  %35 = call float @llvm.AMDGPU.clamp.f32(float %31, float 0.000000e+00, float 1.000000e+00)
91  %36 = insertelement <4 x float> undef, float %32, i32 0
92  %37 = insertelement <4 x float> %36, float %33, i32 1
93  %38 = insertelement <4 x float> %37, float %34, i32 2
94  %39 = insertelement <4 x float> %38, float %35, i32 3
95  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
96  ret void
97}
98
99declare float @llvm.AMDGPU.clamp.f32(float, float, float) readnone
100declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
101