1; RUN: llc < %s -march=ptx32 | FileCheck %s 2 3;CHECK: .extern .global .b8 array_i16[20]; 4@array_i16 = external global [10 x i16] 5 6;CHECK: .extern .const .b8 array_constant_i16[20]; 7@array_constant_i16 = external addrspace(1) constant [10 x i16] 8 9;CHECK: .extern .shared .b8 array_shared_i16[20]; 10@array_shared_i16 = external addrspace(4) global [10 x i16] 11 12;CHECK: .extern .global .b8 array_i32[40]; 13@array_i32 = external global [10 x i32] 14 15;CHECK: .extern .const .b8 array_constant_i32[40]; 16@array_constant_i32 = external addrspace(1) constant [10 x i32] 17 18;CHECK: .extern .shared .b8 array_shared_i32[40]; 19@array_shared_i32 = external addrspace(4) global [10 x i32] 20 21;CHECK: .extern .global .b8 array_i64[80]; 22@array_i64 = external global [10 x i64] 23 24;CHECK: .extern .const .b8 array_constant_i64[80]; 25@array_constant_i64 = external addrspace(1) constant [10 x i64] 26 27;CHECK: .extern .shared .b8 array_shared_i64[80]; 28@array_shared_i64 = external addrspace(4) global [10 x i64] 29 30;CHECK: .extern .global .b8 array_float[40]; 31@array_float = external global [10 x float] 32 33;CHECK: .extern .const .b8 array_constant_float[40]; 34@array_constant_float = external addrspace(1) constant [10 x float] 35 36;CHECK: .extern .shared .b8 array_shared_float[40]; 37@array_shared_float = external addrspace(4) global [10 x float] 38 39;CHECK: .extern .global .b8 array_double[80]; 40@array_double = external global [10 x double] 41 42;CHECK: .extern .const .b8 array_constant_double[80]; 43@array_constant_double = external addrspace(1) constant [10 x double] 44 45;CHECK: .extern .shared .b8 array_shared_double[80]; 46@array_shared_double = external addrspace(4) global [10 x double] 47 48 49define ptx_device i16 @t1_u16(i16* %p) { 50entry: 51;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 52;CHECK: ret; 53 %x = load i16* %p 54 ret i16 %x 55} 56 57define ptx_device i32 @t1_u32(i32* %p) { 58entry: 59;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 60;CHECK: ret; 61 %x = load i32* %p 62 ret i32 %x 63} 64 65define ptx_device i64 @t1_u64(i64* %p) { 66entry: 67;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 68;CHECK: ret; 69 %x = load i64* %p 70 ret i64 %x 71} 72 73define ptx_device float @t1_f32(float* %p) { 74entry: 75;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 76;CHECK: ret; 77 %x = load float* %p 78 ret float %x 79} 80 81define ptx_device double @t1_f64(double* %p) { 82entry: 83;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 84;CHECK: ret; 85 %x = load double* %p 86 ret double %x 87} 88 89define ptx_device i16 @t2_u16(i16* %p) { 90entry: 91;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2]; 92;CHECK: ret; 93 %i = getelementptr i16* %p, i32 1 94 %x = load i16* %i 95 ret i16 %x 96} 97 98define ptx_device i32 @t2_u32(i32* %p) { 99entry: 100;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4]; 101;CHECK: ret; 102 %i = getelementptr i32* %p, i32 1 103 %x = load i32* %i 104 ret i32 %x 105} 106 107define ptx_device i64 @t2_u64(i64* %p) { 108entry: 109;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8]; 110;CHECK: ret; 111 %i = getelementptr i64* %p, i32 1 112 %x = load i64* %i 113 ret i64 %x 114} 115 116define ptx_device float @t2_f32(float* %p) { 117entry: 118;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4]; 119;CHECK: ret; 120 %i = getelementptr float* %p, i32 1 121 %x = load float* %i 122 ret float %x 123} 124 125define ptx_device double @t2_f64(double* %p) { 126entry: 127;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8]; 128;CHECK: ret; 129 %i = getelementptr double* %p, i32 1 130 %x = load double* %i 131 ret double %x 132} 133 134define ptx_device i16 @t3_u16(i16* %p, i32 %q) { 135entry: 136;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1; 137;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; 138;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 139 %i = getelementptr i16* %p, i32 %q 140 %x = load i16* %i 141 ret i16 %x 142} 143 144define ptx_device i32 @t3_u32(i32* %p, i32 %q) { 145entry: 146;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; 147;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; 148;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 149 %i = getelementptr i32* %p, i32 %q 150 %x = load i32* %i 151 ret i32 %x 152} 153 154define ptx_device i64 @t3_u64(i64* %p, i32 %q) { 155entry: 156;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; 157;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; 158;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 159 %i = getelementptr i64* %p, i32 %q 160 %x = load i64* %i 161 ret i64 %x 162} 163 164define ptx_device float @t3_f32(float* %p, i32 %q) { 165entry: 166;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2; 167;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; 168;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 169 %i = getelementptr float* %p, i32 %q 170 %x = load float* %i 171 ret float %x 172} 173 174define ptx_device double @t3_f64(double* %p, i32 %q) { 175entry: 176;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3; 177;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]]; 178;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}]; 179 %i = getelementptr double* %p, i32 %q 180 %x = load double* %i 181 ret double %x 182} 183 184define ptx_device i16 @t4_global_u16() { 185entry: 186;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; 187;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]]; 188;CHECK: ret; 189 %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 190 %x = load i16* %i 191 ret i16 %x 192} 193 194define ptx_device i32 @t4_global_u32() { 195entry: 196;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; 197;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]]; 198;CHECK: ret; 199 %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 200 %x = load i32* %i 201 ret i32 %x 202} 203 204define ptx_device i64 @t4_global_u64() { 205entry: 206;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; 207;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]]; 208;CHECK: ret; 209 %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 210 %x = load i64* %i 211 ret i64 %x 212} 213 214define ptx_device float @t4_global_f32() { 215entry: 216;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; 217;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]]; 218;CHECK: ret; 219 %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 220 %x = load float* %i 221 ret float %x 222} 223 224define ptx_device double @t4_global_f64() { 225entry: 226;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; 227;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]]; 228;CHECK: ret; 229 %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 230 %x = load double* %i 231 ret double %x 232} 233 234define ptx_device i16 @t4_const_u16() { 235entry: 236;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16; 237;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]]; 238;CHECK: ret; 239 %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 240 %x = load i16 addrspace(1)* %i 241 ret i16 %x 242} 243 244define ptx_device i32 @t4_const_u32() { 245entry: 246;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32; 247;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]]; 248;CHECK: ret; 249 %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 250 %x = load i32 addrspace(1)* %i 251 ret i32 %x 252} 253 254define ptx_device i64 @t4_const_u64() { 255entry: 256;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64; 257;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]]; 258;CHECK: ret; 259 %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 260 %x = load i64 addrspace(1)* %i 261 ret i64 %x 262} 263 264define ptx_device float @t4_const_f32() { 265entry: 266;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float; 267;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]]; 268;CHECK: ret; 269 %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 270 %x = load float addrspace(1)* %i 271 ret float %x 272} 273 274define ptx_device double @t4_const_f64() { 275entry: 276;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double; 277;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]]; 278;CHECK: ret; 279 %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 280 %x = load double addrspace(1)* %i 281 ret double %x 282} 283 284define ptx_device i16 @t4_shared_u16() { 285entry: 286;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16; 287;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]]; 288;CHECK: ret; 289 %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 290 %x = load i16 addrspace(4)* %i 291 ret i16 %x 292} 293 294define ptx_device i32 @t4_shared_u32() { 295entry: 296;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32; 297;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]]; 298;CHECK: ret; 299 %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 300 %x = load i32 addrspace(4)* %i 301 ret i32 %x 302} 303 304define ptx_device i64 @t4_shared_u64() { 305entry: 306;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64; 307;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]]; 308;CHECK: ret; 309 %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 310 %x = load i64 addrspace(4)* %i 311 ret i64 %x 312} 313 314define ptx_device float @t4_shared_f32() { 315entry: 316;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float; 317;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]]; 318;CHECK: ret; 319 %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 320 %x = load float addrspace(4)* %i 321 ret float %x 322} 323 324define ptx_device double @t4_shared_f64() { 325entry: 326;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double; 327;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]]; 328;CHECK: ret; 329 %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 330 %x = load double addrspace(4)* %i 331 ret double %x 332} 333 334define ptx_device i16 @t5_u16() { 335entry: 336;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16; 337;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2]; 338;CHECK: ret; 339 %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 340 %x = load i16* %i 341 ret i16 %x 342} 343 344define ptx_device i32 @t5_u32() { 345entry: 346;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32; 347;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4]; 348;CHECK: ret; 349 %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 350 %x = load i32* %i 351 ret i32 %x 352} 353 354define ptx_device i64 @t5_u64() { 355entry: 356;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64; 357;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8]; 358;CHECK: ret; 359 %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 360 %x = load i64* %i 361 ret i64 %x 362} 363 364define ptx_device float @t5_f32() { 365entry: 366;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float; 367;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4]; 368;CHECK: ret; 369 %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 370 %x = load float* %i 371 ret float %x 372} 373 374define ptx_device double @t5_f64() { 375entry: 376;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double; 377;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8]; 378;CHECK: ret; 379 %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 380 %x = load double* %i 381 ret double %x 382} 383