1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s 2 3declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 4 5; OPT-LABEL: @constant_load_i1 6; OPT: load i1 7; OPT-NEXT: store i1 8define amdgpu_kernel void @constant_load_i1(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 { 9 %val = load i1, i1 addrspace(4)* %in 10 store i1 %val, i1 addrspace(1)* %out 11 ret void 12} 13 14; OPT-LABEL: @constant_load_i1_align2 15; OPT: load i1 16; OPT-NEXT: store 17define amdgpu_kernel void @constant_load_i1_align2(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 { 18 %val = load i1, i1 addrspace(4)* %in, align 2 19 store i1 %val, i1 addrspace(1)* %out, align 2 20 ret void 21} 22 23; OPT-LABEL: @constant_load_i1_align4 24; OPT: bitcast 25; OPT-NEXT: load i32 26; OPT-NEXT: trunc 27; OPT-NEXT: store 28define amdgpu_kernel void @constant_load_i1_align4(i1 addrspace(1)* %out, i1 addrspace(4)* %in) #0 { 29 %val = load i1, i1 addrspace(4)* %in, align 4 30 store i1 %val, i1 addrspace(1)* %out, align 4 31 ret void 32} 33 34; OPT-LABEL: @constant_load_i8 35; OPT: load i8 36; OPT-NEXT: store 37define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 38 %val = load i8, i8 addrspace(4)* %in 39 store i8 %val, i8 addrspace(1)* %out 40 ret void 41} 42 43; OPT-LABEL: @constant_load_i8_align2 44; OPT: load i8 45; OPT-NEXT: store 46define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 47 %val = load i8, i8 addrspace(4)* %in, align 2 48 store i8 %val, i8 addrspace(1)* %out, align 2 49 ret void 50} 51 52; OPT-LABEL: @constant_load_i8align4 53; OPT: bitcast 54; OPT-NEXT: load i32 55; OPT-NEXT: trunc 56; OPT-NEXT: store 57define amdgpu_kernel void @constant_load_i8align4(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 58 %val = load i8, i8 addrspace(4)* %in, align 4 59 store i8 %val, i8 addrspace(1)* %out, align 4 60 ret void 61} 62 63 64; OPT-LABEL: @constant_load_v2i8 65; OPT: load <2 x i8> 66; OPT-NEXT: store 67define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 68 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in 69 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 70 ret void 71} 72 73; OPT-LABEL: @constant_load_v2i8_align4 74; OPT: bitcast 75; OPT-NEXT: load i32 76; OPT-NEXT: trunc 77; OPT-NEXT: bitcast 78; OPT-NEXT: store 79define amdgpu_kernel void @constant_load_v2i8_align4(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 80 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in, align 4 81 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out, align 4 82 ret void 83} 84 85; OPT-LABEL: @constant_load_v3i8 86; OPT: bitcast <3 x i8> 87; OPT-NEXT: load i32, i32 addrspace(4) 88; OPT-NEXT: trunc i32 89; OPT-NEXT: bitcast i24 90; OPT-NEXT: store <3 x i8> 91define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 92 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 93 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out 94 ret void 95} 96 97; OPT-LABEL: @constant_load_v3i8_align4 98; OPT: bitcast <3 x i8> 99; OPT-NEXT: load i32, i32 addrspace(4) 100; OPT-NEXT: trunc i32 101; OPT-NEXT: bitcast i24 102; OPT-NEXT: store <3 x i8> 103define amdgpu_kernel void @constant_load_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 104 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in, align 4 105 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out, align 4 106 ret void 107} 108 109; OPT-LABEL: @constant_load_i16 110; OPT: load i16 111; OPT: sext 112; OPT-NEXT: store 113define amdgpu_kernel void @constant_load_i16(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 114 %ld = load i16, i16 addrspace(4)* %in 115 %ext = sext i16 %ld to i32 116 store i32 %ext, i32 addrspace(1)* %out 117 ret void 118} 119 120; OPT-LABEL: @constant_load_i16_align4 121; OPT: bitcast 122; OPT-NEXT: load i32 123; OPT-NEXT: trunc 124; OPT-NEXT: sext 125; OPT-NEXT: store 126define amdgpu_kernel void @constant_load_i16_align4(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 127 %ld = load i16, i16 addrspace(4)* %in, align 4 128 %ext = sext i16 %ld to i32 129 store i32 %ext, i32 addrspace(1)* %out, align 4 130 ret void 131} 132 133; OPT-LABEL: @constant_load_f16 134; OPT: load half 135; OPT-NEXT: store 136define amdgpu_kernel void @constant_load_f16(half addrspace(1)* %out, half addrspace(4)* %in) #0 { 137 %ld = load half, half addrspace(4)* %in 138 store half %ld, half addrspace(1)* %out 139 ret void 140} 141 142; OPT-LABEL: @constant_load_v2f16 143; OPT: load <2 x half> 144; OPT-NEXT: store 145define amdgpu_kernel void @constant_load_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(4)* %in) #0 { 146 %ld = load <2 x half>, <2 x half> addrspace(4)* %in 147 store <2 x half> %ld, <2 x half> addrspace(1)* %out 148 ret void 149} 150 151; OPT-LABEL: @load_volatile 152; OPT: load volatile i16 153; OPT-NEXT: store 154define amdgpu_kernel void @load_volatile(i16 addrspace(1)* %out, i16 addrspace(4)* %in) { 155 %a = load volatile i16, i16 addrspace(4)* %in 156 store i16 %a, i16 addrspace(1)* %out 157 ret void 158} 159 160; OPT-LABEL: @constant_load_v2i8_volatile 161; OPT: load volatile <2 x i8> 162; OPT-NEXT: store 163define amdgpu_kernel void @constant_load_v2i8_volatile(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 164 %ld = load volatile <2 x i8>, <2 x i8> addrspace(4)* %in 165 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 166 ret void 167} 168 169; OPT-LABEL: @constant_load_v2i8_addrspace1 170; OPT: load <2 x i8> 171; OPT-NEXT: store 172define amdgpu_kernel void @constant_load_v2i8_addrspace1(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 173 %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in 174 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 175 ret void 176} 177 178; OPT-LABEL: @use_dispatch_ptr 179; OPT: bitcast 180; OPT-NEXT: load i32 181; OPT-NEXT: trunc 182; OPT-NEXT: zext 183; OPT-NEXT: store 184define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 185 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 186 %val = load i8, i8 addrspace(4)* %dispatch.ptr, align 4 187 %ld = zext i8 %val to i32 188 store i32 %ld, i32 addrspace(1)* %ptr 189 ret void 190} 191 192; OPT-LABEL: @constant_load_i16_align4_range( 193; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !0 194define amdgpu_kernel void @constant_load_i16_align4_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 195 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !0 196 %ext = sext i16 %ld to i32 197 store i32 %ext, i32 addrspace(1)* %out 198 ret void 199} 200 201; OPT-LABEL: @constant_load_i16_align4_range_max( 202; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !0 203define amdgpu_kernel void @constant_load_i16_align4_range_max(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 204 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !1 205 %ext = sext i16 %ld to i32 206 store i32 %ext, i32 addrspace(1)* %out 207 ret void 208} 209 210; OPT-LABEL: @constant_load_i16_align4_complex_range( 211; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !1 212define amdgpu_kernel void @constant_load_i16_align4_complex_range(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 213 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !2 214 %ext = sext i16 %ld to i32 215 store i32 %ext, i32 addrspace(1)* %out 216 ret void 217} 218 219; OPT-LABEL: @constant_load_i16_align4_range_from_0( 220; OPT: load i32, i32 addrspace(4)* %1, align 4{{$}} 221define amdgpu_kernel void @constant_load_i16_align4_range_from_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 222 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !3 223 %ext = sext i16 %ld to i32 224 store i32 %ext, i32 addrspace(1)* %out 225 ret void 226} 227 228; OPT-LABEL: @constant_load_i16_align4_range_from_neg( 229; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !2 230define amdgpu_kernel void @constant_load_i16_align4_range_from_neg(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 231 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !4 232 %ext = sext i16 %ld to i32 233 store i32 %ext, i32 addrspace(1)* %out 234 ret void 235} 236 237; OPT-LABEL: @constant_load_i16_align4_range_from_neg_to_0( 238; OPT: load i32, i32 addrspace(4)* %1, align 4, !range !2 239define amdgpu_kernel void @constant_load_i16_align4_range_from_neg_to_0(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 240 %ld = load i16, i16 addrspace(4)* %in, align 4, !range !5 241 %ext = sext i16 %ld to i32 242 store i32 %ext, i32 addrspace(1)* %out 243 ret void 244} 245 246; OPT-LABEL: @constant_load_i16_align4_invariant 247; OPT: load i32, i32 addrspace(4)* %1, align 4, !invariant.load !3 248define amdgpu_kernel void @constant_load_i16_align4_invariant(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 249 %ld = load i16, i16 addrspace(4)* %in, align 4, !invariant.load !6 250 %ext = sext i16 %ld to i32 251 store i32 %ext, i32 addrspace(1)* %out 252 ret void 253} 254 255attributes #0 = { nounwind } 256 257; OPT: !0 = !{i32 5, i32 0} 258; OPT: !1 = !{i32 8, i32 0} 259; OPT: !2 = !{i32 65520, i32 0} 260; OPT: !3 = !{} 261 262!0 = !{i16 5, i16 500} 263!1 = !{i16 5, i16 -1} 264!2 = !{i16 8, i16 12, i16 42, i16 99} 265!3 = !{i16 0, i16 255} 266!4 = !{i16 -16, i16 16} 267!5 = !{i16 -16, i16 0} 268!6 = !{} 269