1; Verifies correctness of load/store of parameters and return values. 2; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s 3 4%s_i1 = type { i1 } 5%s_i8 = type { i8 } 6%s_i16 = type { i16 } 7%s_f16 = type { half } 8%s_i32 = type { i32 } 9%s_f32 = type { float } 10%s_i64 = type { i64 } 11%s_f64 = type { double } 12 13; More complicated types. i64 is used to increase natural alignment 14; requirement for the type. 15%s_i32x4 = type { i32, i32, i32, i32, i64} 16%s_i32f32 = type { i32, float, i32, float, i64} 17%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64} 18%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}> 19%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]} 20; All scalar parameters must be at least 32 bits in size. 21; i1 is loaded/stored as i8. 22 23; CHECK: .func (.param .b32 func_retval0) 24; CHECK-LABEL: test_i1( 25; CHECK-NEXT: .param .b32 test_i1_param_0 26; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0]; 27; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; 28; CHECK: setp.eq.b16 %p1, [[A]], 1 29; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] 30; CHECK: and.b32 [[C:%r[0-9]+]], [[B]], 1; 31; CHECK: .param .b32 param0; 32; CHECK: st.param.b32 [param0+0], [[C]] 33; CHECK: .param .b32 retval0; 34; CHECK: call.uni 35; CHECK-NEXT: test_i1, 36; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; 37; CHECK: and.b32 [[R:%r[0-9]+]], [[R8]], 1; 38; CHECK: st.param.b32 [func_retval0+0], [[R]]; 39; CHECK: ret; 40define i1 @test_i1(i1 %a) { 41 %r = tail call i1 @test_i1(i1 %a); 42 ret i1 %r; 43} 44 45; Signed i1 is a somewhat special case. We only care about one bit and 46; then us neg.s32 to convert it to 32-bit -1 if it's set. 47; CHECK: .func (.param .b32 func_retval0) 48; CHECK-LABEL: test_i1s( 49; CHECK-NEXT: .param .b32 test_i1s_param_0 50; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0]; 51; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 52; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; 53; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; 54; CHECK: .param .b32 param0; 55; CHECK: st.param.b32 [param0+0], [[A]]; 56; CHECK: .param .b32 retval0; 57; CHECK: call.uni 58; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0+0]; 59; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; 60; CHECK: neg.s32 [[R:%r[0-9]+]], [[R1]]; 61; CHECK: st.param.b32 [func_retval0+0], [[R]]; 62; CHECK-NEXT: ret; 63define signext i1 @test_i1s(i1 signext %a) { 64 %r = tail call signext i1 @test_i1s(i1 signext %a); 65 ret i1 %r; 66} 67 68; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment. 69; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 70; CHECK-LABEL: test_v3i1( 71; CHECK-NEXT: .param .align 4 .b8 test_v3i1_param_0[4] 72; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; 73; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i1_param_0] 74; CHECK: .param .align 4 .b8 param0[4]; 75; CHECK-DAG: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; 76; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; 77; CHECK: .param .align 4 .b8 retval0[4]; 78; CHECK: call.uni (retval0), 79; CHECK-NEXT: test_v3i1, 80; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 81; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 82; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]} 83; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 84; CHECK-NEXT: ret; 85define <3 x i1> @test_v3i1(<3 x i1> %a) { 86 %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a); 87 ret <3 x i1> %r; 88} 89 90; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 91; CHECK-LABEL: test_v4i1( 92; CHECK-NEXT: .param .align 4 .b8 test_v4i1_param_0[4] 93; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i1_param_0] 94; CHECK: .param .align 4 .b8 param0[4]; 95; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 96; CHECK: .param .align 4 .b8 retval0[4]; 97; CHECK: call.uni (retval0), 98; CHECK: test_v4i1, 99; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 100; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}; 101; CHECK-NEXT: ret; 102define <4 x i1> @test_v4i1(<4 x i1> %a) { 103 %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a); 104 ret <4 x i1> %r; 105} 106 107; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 108; CHECK-LABEL: test_v5i1( 109; CHECK-NEXT: .param .align 8 .b8 test_v5i1_param_0[8] 110; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; 111; CHECK-DAG: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i1_param_0] 112; CHECK: .param .align 8 .b8 param0[8]; 113; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 114; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 115; CHECK: .param .align 8 .b8 retval0[8]; 116; CHECK: call.uni (retval0), 117; CHECK-NEXT: test_v5i1, 118; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 119; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 120; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 121; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 122; CHECK-NEXT: ret; 123define <5 x i1> @test_v5i1(<5 x i1> %a) { 124 %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a); 125 ret <5 x i1> %r; 126} 127 128; Unsigned i8 is loaded directly into 32-bit register. 129; CHECK: .func (.param .b32 func_retval0) 130; CHECK-LABEL: test_i8( 131; CHECK-NEXT: .param .b32 test_i8_param_0 132; CHECK: ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0]; 133; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; 134; CHECK: and.b32 [[A:%r[0-9]+]], [[A32]], 255; 135; CHECK: .param .b32 param0; 136; CHECK: st.param.b32 [param0+0], [[A]]; 137; CHECK: .param .b32 retval0; 138; CHECK: call.uni (retval0), 139; CHECK: test_i8, 140; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; 141; CHECK: and.b32 [[R:%r[0-9]+]], [[R32]], 255; 142; CHECK: st.param.b32 [func_retval0+0], [[R]]; 143; CHECK-NEXT: ret; 144define i8 @test_i8(i8 %a) { 145 %r = tail call i8 @test_i8(i8 %a); 146 ret i8 %r; 147} 148 149; signed i8 is loaded into 16-bit register which is then sign-extended to i32. 150; CHECK: .func (.param .b32 func_retval0) 151; CHECK-LABEL: test_i8s( 152; CHECK-NEXT: .param .b32 test_i8s_param_0 153; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; 154; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; 155; CHECK: .param .b32 param0; 156; CHECK: st.param.b32 [param0+0], [[A]]; 157; CHECK: .param .b32 retval0; 158; CHECK: call.uni (retval0), 159; CHECK: test_i8s, 160; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0+0]; 161; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? 162; CHECK: cvt.u16.u32 [[R16:%rs[0-9]+]], [[R32]]; 163; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[R16]]; 164; CHECK: st.param.b32 [func_retval0+0], [[R]]; 165; CHECK-NEXT: ret; 166define signext i8 @test_i8s(i8 signext %a) { 167 %r = tail call signext i8 @test_i8s(i8 signext %a); 168 ret i8 %r; 169} 170 171; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 172; CHECK-LABEL: test_v3i8( 173; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] 174; CHECK-DAG: ld.param.u8 [[E2:%rs[0-9]+]], [test_v3i8_param_0+2]; 175; CHECK-DAG: ld.param.v2.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0]; 176; CHECK: .param .align 4 .b8 param0[4]; 177; CHECK: st.param.v2.b8 [param0+0], {[[E0]], [[E1]]}; 178; CHECK: st.param.b8 [param0+2], [[E2]]; 179; CHECK: .param .align 4 .b8 retval0[4]; 180; CHECK: call.uni (retval0), 181; CHECK-NEXT: test_v3i8, 182; CHECK-DAG: ld.param.v2.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 183; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; 184; CHECK-DAG: st.param.v2.b8 [func_retval0+0], {[[RE0]], [[RE1]]}; 185; CHECK-DAG: st.param.b8 [func_retval0+2], [[RE2]]; 186; CHECK-NEXT: ret; 187define <3 x i8> @test_v3i8(<3 x i8> %a) { 188 %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a); 189 ret <3 x i8> %r; 190} 191 192; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 193; CHECK-LABEL: test_v4i8( 194; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] 195; CHECK: ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0] 196; CHECK: .param .align 4 .b8 param0[4]; 197; CHECK: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 198; CHECK: .param .align 4 .b8 retval0[4]; 199; CHECK: call.uni (retval0), 200; CHECK-NEXT: test_v4i8, 201; CHECK: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 202; CHECK: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 203; CHECK-NEXT: ret; 204define <4 x i8> @test_v4i8(<4 x i8> %a) { 205 %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a); 206 ret <4 x i8> %r; 207} 208 209; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 210; CHECK-LABEL: test_v5i8( 211; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8] 212; CHECK-DAG: ld.param.u8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; 213; CHECK-DAG ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0] 214; CHECK: .param .align 8 .b8 param0[8]; 215; CHECK-DAG: st.param.v4.b8 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 216; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; 217; CHECK: .param .align 8 .b8 retval0[8]; 218; CHECK: call.uni (retval0), 219; CHECK-NEXT: test_v5i8, 220; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 221; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; 222; CHECK-DAG: st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 223; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; 224; CHECK-NEXT: ret; 225define <5 x i8> @test_v5i8(<5 x i8> %a) { 226 %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a); 227 ret <5 x i8> %r; 228} 229 230; CHECK: .func (.param .b32 func_retval0) 231; CHECK-LABEL: test_i16( 232; CHECK-NEXT: .param .b32 test_i16_param_0 233; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16_param_0]; 234; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; 235; CHECK: .param .b32 param0; 236; CHECK: st.param.b32 [param0+0], [[E32]]; 237; CHECK: .param .b32 retval0; 238; CHECK: call.uni (retval0), 239; CHECK-NEXT: test_i16, 240; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; 241; CHECK: and.b32 [[R:%r[0-9]+]], [[RE32]], 65535; 242; CHECK: st.param.b32 [func_retval0+0], [[R]]; 243; CHECK-NEXT: ret; 244define i16 @test_i16(i16 %a) { 245 %r = tail call i16 @test_i16(i16 %a); 246 ret i16 %r; 247} 248 249; CHECK: .func (.param .b32 func_retval0) 250; CHECK-LABEL: test_i16s( 251; CHECK-NEXT: .param .b32 test_i16s_param_0 252; CHECK: ld.param.u16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; 253; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; 254; CHECK: .param .b32 param0; 255; CHECK: st.param.b32 [param0+0], [[E32]]; 256; CHECK: .param .b32 retval0; 257; CHECK: call.uni (retval0), 258; CHECK-NEXT: test_i16s, 259; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0+0]; 260; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; 261; CHECK: st.param.b32 [func_retval0+0], [[R]]; 262; CHECK-NEXT: ret; 263define signext i16 @test_i16s(i16 signext %a) { 264 %r = tail call signext i16 @test_i16s(i16 signext %a); 265 ret i16 %r; 266} 267 268; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 269; CHECK-LABEL: test_v3i16( 270; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8] 271; CHECK-DAG: ld.param.u16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4]; 272; CHECK-DAG: ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0]; 273; CHECK: .param .align 8 .b8 param0[8]; 274; CHECK: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; 275; CHECK: st.param.b16 [param0+4], [[E2]]; 276; CHECK: .param .align 8 .b8 retval0[8]; 277; CHECK: call.uni (retval0), 278; CHECK-NEXT: test_v3i16, 279; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0]; 280; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; 281; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]}; 282; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; 283; CHECK-NEXT: ret; 284define <3 x i16> @test_v3i16(<3 x i16> %a) { 285 %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a); 286 ret <3 x i16> %r; 287} 288 289; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 290; CHECK-LABEL: test_v4i16( 291; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] 292; CHECK: ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0] 293; CHECK: .param .align 8 .b8 param0[8]; 294; CHECK: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 295; CHECK: .param .align 8 .b8 retval0[8]; 296; CHECK: call.uni (retval0), 297; CHECK-NEXT: test_v4i16, 298; CHECK: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 299; CHECK: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 300; CHECK-NEXT: ret; 301define <4 x i16> @test_v4i16(<4 x i16> %a) { 302 %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a); 303 ret <4 x i16> %r; 304} 305 306; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 307; CHECK-LABEL: test_v5i16( 308; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] 309; CHECK-DAG: ld.param.u16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; 310; CHECK-DAG ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] 311; CHECK: .param .align 16 .b8 param0[16]; 312; CHECK-DAG: st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 313; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 314; CHECK: .param .align 16 .b8 retval0[16]; 315; CHECK: call.uni (retval0), 316; CHECK-NEXT: test_v5i16, 317; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0]; 318; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; 319; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 320; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; 321; CHECK-NEXT: ret; 322define <5 x i16> @test_v5i16(<5 x i16> %a) { 323 %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a); 324 ret <5 x i16> %r; 325} 326 327; CHECK: .func (.param .b32 func_retval0) 328; CHECK-LABEL: test_f16( 329; CHECK-NEXT: .param .b32 test_f16_param_0 330; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0]; 331; CHECK: .param .b32 param0; 332; CHECK: st.param.b16 [param0+0], [[E]]; 333; CHECK: .param .b32 retval0; 334; CHECK: call.uni (retval0), 335; CHECK-NEXT: test_f16, 336; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 337; CHECK: st.param.b16 [func_retval0+0], [[R]] 338; CHECK-NEXT: ret; 339define half @test_f16(half %a) { 340 %r = tail call half @test_f16(half %a); 341 ret half %r; 342} 343 344; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 345; CHECK-LABEL: test_v2f16( 346; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] 347; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0]; 348; CHECK: .param .align 4 .b8 param0[4]; 349; CHECK: st.param.b32 [param0+0], [[E]]; 350; CHECK: .param .align 4 .b8 retval0[4]; 351; CHECK: call.uni (retval0), 352; CHECK-NEXT: test_v2f16, 353; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; 354; CHECK: st.param.b32 [func_retval0+0], [[R]] 355; CHECK-NEXT: ret; 356define <2 x half> @test_v2f16(<2 x half> %a) { 357 %r = tail call <2 x half> @test_v2f16(<2 x half> %a); 358 ret <2 x half> %r; 359} 360 361; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 362; CHECK-LABEL: test_v3f16( 363; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] 364; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0]; 365; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; 366; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4]; 367; CHECK: .param .align 8 .b8 param0[8]; 368; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; 369; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; 370; CHECK: .param .align 8 .b8 retval0[8]; 371; CHECK: call.uni (retval0), 372; CHECK: test_v3f16, 373; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0]; 374; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4]; 375; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; 376; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; 377; CHECK: ret; 378define <3 x half> @test_v3f16(<3 x half> %a) { 379 %r = tail call <3 x half> @test_v3f16(<3 x half> %a); 380 ret <3 x half> %r; 381} 382 383; CHECK:.func (.param .align 8 .b8 func_retval0[8]) 384; CHECK-LABEL: test_v4f16( 385; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] 386; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; 387; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; 388; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; 389; CHECK: .param .align 8 .b8 param0[8]; 390; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]}; 391; CHECK: .param .align 8 .b8 retval0[8]; 392; CHECK: call.uni (retval0), 393; CHECK: test_v4f16, 394; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0]; 395; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]}; 396; CHECK: ret; 397define <4 x half> @test_v4f16(<4 x half> %a) { 398 %r = tail call <4 x half> @test_v4f16(<4 x half> %a); 399 ret <4 x half> %r; 400} 401 402; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 403; CHECK-LABEL: test_v5f16( 404; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] 405; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0]; 406; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; 407; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8]; 408; CHECK: .param .align 16 .b8 param0[16]; 409; CHECK-DAG: st.param.v4.b16 [param0+0], 410; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; 411; CHECK: .param .align 16 .b8 retval0[16]; 412; CHECK: call.uni (retval0), 413; CHECK: test_v5f16, 414; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; 415; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8]; 416; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 417; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; 418; CHECK: ret; 419define <5 x half> @test_v5f16(<5 x half> %a) { 420 %r = tail call <5 x half> @test_v5f16(<5 x half> %a); 421 ret <5 x half> %r; 422} 423 424; CHECK:.func (.param .align 16 .b8 func_retval0[16]) 425; CHECK-LABEL: test_v8f16( 426; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] 427; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; 428; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; 429; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; 430; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]]; 431; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]]; 432; CHECK: .param .align 16 .b8 param0[16]; 433; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]}; 434; CHECK: .param .align 16 .b8 retval0[16]; 435; CHECK: call.uni (retval0), 436; CHECK: test_v8f16, 437; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0]; 438; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; 439; CHECK: ret; 440define <8 x half> @test_v8f16(<8 x half> %a) { 441 %r = tail call <8 x half> @test_v8f16(<8 x half> %a); 442 ret <8 x half> %r; 443} 444 445; CHECK:.func (.param .align 32 .b8 func_retval0[32]) 446; CHECK-LABEL: test_v9f16( 447; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] 448; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0]; 449; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8]; 450; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16]; 451; CHECK: .param .align 32 .b8 param0[32]; 452; CHECK-DAG: st.param.v4.b16 [param0+0], 453; CHECK-DAG: st.param.v4.b16 [param0+8], 454; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; 455; CHECK: .param .align 32 .b8 retval0[32]; 456; CHECK: call.uni (retval0), 457; CHECK: test_v9f16, 458; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; 459; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8]; 460; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16]; 461; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; 462; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; 463; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; 464; CHECK: ret; 465define <9 x half> @test_v9f16(<9 x half> %a) { 466 %r = tail call <9 x half> @test_v9f16(<9 x half> %a); 467 ret <9 x half> %r; 468} 469 470; CHECK: .func (.param .b32 func_retval0) 471; CHECK-LABEL: test_i32( 472; CHECK-NEXT: .param .b32 test_i32_param_0 473; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_i32_param_0]; 474; CHECK: .param .b32 param0; 475; CHECK: st.param.b32 [param0+0], [[E]]; 476; CHECK: .param .b32 retval0; 477; CHECK: call.uni (retval0), 478; CHECK-NEXT: test_i32, 479; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; 480; CHECK: st.param.b32 [func_retval0+0], [[R]]; 481; CHECK-NEXT: ret; 482define i32 @test_i32(i32 %a) { 483 %r = tail call i32 @test_i32(i32 %a); 484 ret i32 %r; 485} 486 487; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 488; CHECK-LABEL: test_v3i32( 489; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16] 490; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; 491; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; 492; CHECK: .param .align 16 .b8 param0[16]; 493; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 494; CHECK: st.param.b32 [param0+8], [[E2]]; 495; CHECK: .param .align 16 .b8 retval0[16]; 496; CHECK: call.uni (retval0), 497; CHECK-NEXT: test_v3i32, 498; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 499; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 500; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 501; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 502; CHECK-NEXT: ret; 503define <3 x i32> @test_v3i32(<3 x i32> %a) { 504 %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a); 505 ret <3 x i32> %r; 506} 507 508; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 509; CHECK-LABEL: test_v4i32( 510; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] 511; CHECK: ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] 512; CHECK: .param .align 16 .b8 param0[16]; 513; CHECK: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 514; CHECK: .param .align 16 .b8 retval0[16]; 515; CHECK: call.uni (retval0), 516; CHECK-NEXT: test_v4i32, 517; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; 518; CHECK: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 519; CHCK-NEXT: ret; 520define <4 x i32> @test_v4i32(<4 x i32> %a) { 521 %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a); 522 ret <4 x i32> %r; 523} 524 525; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 526; CHECK-LABEL: test_v5i32( 527; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32] 528; CHECK-DAG: ld.param.u32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; 529; CHECK-DAG ld.param.v4.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] 530; CHECK: .param .align 32 .b8 param0[32]; 531; CHECK-DAG: st.param.v4.b32 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]}; 532; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; 533; CHECK: .param .align 32 .b8 retval0[32]; 534; CHECK: call.uni (retval0), 535; CHECK-NEXT: test_v5i32, 536; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0]; 537; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 538; CHECK-DAG: st.param.v4.b32 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} 539; CHECK-DAG: st.param.b32 [func_retval0+16], [[RE4]]; 540; CHECK-NEXT: ret; 541define <5 x i32> @test_v5i32(<5 x i32> %a) { 542 %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a); 543 ret <5 x i32> %r; 544} 545 546; CHECK: .func (.param .b32 func_retval0) 547; CHECK-LABEL: test_f32( 548; CHECK-NEXT: .param .b32 test_f32_param_0 549; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_f32_param_0]; 550; CHECK: .param .b32 param0; 551; CHECK: st.param.f32 [param0+0], [[E]]; 552; CHECK: .param .b32 retval0; 553; CHECK: call.uni (retval0), 554; CHECK-NEXT: test_f32, 555; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; 556; CHECK: st.param.f32 [func_retval0+0], [[R]]; 557; CHECK-NEXT: ret; 558define float @test_f32(float %a) { 559 %r = tail call float @test_f32(float %a); 560 ret float %r; 561} 562 563; CHECK: .func (.param .b64 func_retval0) 564; CHECK-LABEL: test_i64( 565; CHECK-NEXT: .param .b64 test_i64_param_0 566; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_i64_param_0]; 567; CHECK: .param .b64 param0; 568; CHECK: st.param.b64 [param0+0], [[E]]; 569; CHECK: .param .b64 retval0; 570; CHECK: call.uni (retval0), 571; CHECK-NEXT: test_i64, 572; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; 573; CHECK: st.param.b64 [func_retval0+0], [[R]]; 574; CHECK-NEXT: ret; 575define i64 @test_i64(i64 %a) { 576 %r = tail call i64 @test_i64(i64 %a); 577 ret i64 %r; 578} 579 580; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 581; CHECK-LABEL: test_v3i64( 582; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32] 583; CHECK-DAG: ld.param.u64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; 584; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; 585; CHECK: .param .align 32 .b8 param0[32]; 586; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; 587; CHECK: st.param.b64 [param0+16], [[E2]]; 588; CHECK: .param .align 32 .b8 retval0[32]; 589; CHECK: call.uni (retval0), 590; CHECK-NEXT: test_v3i64, 591; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; 592; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; 593; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 594; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 595; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 596; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE2]]; 597; CHECK-NEXT: ret; 598define <3 x i64> @test_v3i64(<3 x i64> %a) { 599 %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a); 600 ret <3 x i64> %r; 601} 602 603; For i64 vector loads are limited by PTX to 2 elements. 604; CHECK: .func (.param .align 32 .b8 func_retval0[32]) 605; CHECK-LABEL: test_v4i64( 606; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32] 607; CHECK-DAG: ld.param.v2.u64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; 608; CHECK-DAG: ld.param.v2.u64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; 609; CHECK: .param .align 32 .b8 param0[32]; 610; CHECK: st.param.v2.b64 [param0+0], {[[E0]], [[E1]]}; 611; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; 612; CHECK: .param .align 32 .b8 retval0[32]; 613; CHECK: call.uni (retval0), 614; CHECK-NEXT: test_v4i64, 615; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0]; 616; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; 617; CHECK-DAG: st.param.v2.b64 [func_retval0+16], {[[RE2]], [[RE3]]}; 618; CHECK-DAG: st.param.v2.b64 [func_retval0+0], {[[RE0]], [[RE1]]}; 619; CHECK-NEXT: ret; 620define <4 x i64> @test_v4i64(<4 x i64> %a) { 621 %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a); 622 ret <4 x i64> %r; 623} 624 625; Aggregates, on the other hand, do not get extended. 626 627; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 628; CHECK-LABEL: test_s_i1( 629; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] 630; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; 631; CHECK: .param .align 1 .b8 param0[1]; 632; CHECK: st.param.b8 [param0+0], [[A]] 633; CHECK: .param .align 1 .b8 retval0[1]; 634; CHECK: call.uni 635; CHECK-NEXT: test_s_i1, 636; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; 637; CHECK: st.param.b8 [func_retval0+0], [[R]]; 638; CHECK-NEXT: ret; 639define %s_i1 @test_s_i1(%s_i1 %a) { 640 %r = tail call %s_i1 @test_s_i1(%s_i1 %a); 641 ret %s_i1 %r; 642} 643 644; CHECK: .func (.param .align 1 .b8 func_retval0[1]) 645; CHECK-LABEL: test_s_i8( 646; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] 647; CHECK: ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; 648; CHECK: .param .align 1 .b8 param0[1]; 649; CHECK: st.param.b8 [param0+0], [[A]] 650; CHECK: .param .align 1 .b8 retval0[1]; 651; CHECK: call.uni 652; CHECK-NEXT: test_s_i8, 653; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0+0]; 654; CHECK: st.param.b8 [func_retval0+0], [[R]]; 655; CHECK-NEXT: ret; 656define %s_i8 @test_s_i8(%s_i8 %a) { 657 %r = tail call %s_i8 @test_s_i8(%s_i8 %a); 658 ret %s_i8 %r; 659} 660 661; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 662; CHECK-LABEL: test_s_i16( 663; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] 664; CHECK: ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; 665; CHECK: .param .align 2 .b8 param0[2]; 666; CHECK: st.param.b16 [param0+0], [[A]] 667; CHECK: .param .align 2 .b8 retval0[2]; 668; CHECK: call.uni 669; CHECK-NEXT: test_s_i16, 670; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; 671; CHECK: st.param.b16 [func_retval0+0], [[R]]; 672; CHECK-NEXT: ret; 673define %s_i16 @test_s_i16(%s_i16 %a) { 674 %r = tail call %s_i16 @test_s_i16(%s_i16 %a); 675 ret %s_i16 %r; 676} 677 678; CHECK: .func (.param .align 2 .b8 func_retval0[2]) 679; CHECK-LABEL: test_s_f16( 680; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] 681; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0]; 682; CHECK: .param .align 2 .b8 param0[2]; 683; CHECK: st.param.b16 [param0+0], [[A]] 684; CHECK: .param .align 2 .b8 retval0[2]; 685; CHECK: call.uni 686; CHECK-NEXT: test_s_f16, 687; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; 688; CHECK: st.param.b16 [func_retval0+0], [[R]]; 689; CHECK-NEXT: ret; 690define %s_f16 @test_s_f16(%s_f16 %a) { 691 %r = tail call %s_f16 @test_s_f16(%s_f16 %a); 692 ret %s_f16 %r; 693} 694 695; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 696; CHECK-LABEL: test_s_i32( 697; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] 698; CHECK: ld.param.u32 [[E:%r[0-9]+]], [test_s_i32_param_0]; 699; CHECK: .param .align 4 .b8 param0[4] 700; CHECK: st.param.b32 [param0+0], [[E]]; 701; CHECK: .param .align 4 .b8 retval0[4]; 702; CHECK: call.uni (retval0), 703; CHECK-NEXT: test_s_i32, 704; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; 705; CHECK: st.param.b32 [func_retval0+0], [[R]]; 706; CHECK-NEXT: ret; 707define %s_i32 @test_s_i32(%s_i32 %a) { 708 %r = tail call %s_i32 @test_s_i32(%s_i32 %a); 709 ret %s_i32 %r; 710} 711 712; CHECK: .func (.param .align 4 .b8 func_retval0[4]) 713; CHECK-LABEL: test_s_f32( 714; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] 715; CHECK: ld.param.f32 [[E:%f[0-9]+]], [test_s_f32_param_0]; 716; CHECK: .param .align 4 .b8 param0[4] 717; CHECK: st.param.f32 [param0+0], [[E]]; 718; CHECK: .param .align 4 .b8 retval0[4]; 719; CHECK: call.uni (retval0), 720; CHECK-NEXT: test_s_f32, 721; CHECK: ld.param.f32 [[R:%f[0-9]+]], [retval0+0]; 722; CHECK: st.param.f32 [func_retval0+0], [[R]]; 723; CHECK-NEXT: ret; 724define %s_f32 @test_s_f32(%s_f32 %a) { 725 %r = tail call %s_f32 @test_s_f32(%s_f32 %a); 726 ret %s_f32 %r; 727} 728 729; CHECK: .func (.param .align 8 .b8 func_retval0[8]) 730; CHECK-LABEL: test_s_i64( 731; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] 732; CHECK: ld.param.u64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; 733; CHECK: .param .align 8 .b8 param0[8]; 734; CHECK: st.param.b64 [param0+0], [[E]]; 735; CHECK: .param .align 8 .b8 retval0[8]; 736; CHECK: call.uni (retval0), 737; CHECK-NEXT: test_s_i64, 738; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0+0]; 739; CHECK: st.param.b64 [func_retval0+0], [[R]]; 740; CHECK-NEXT: ret; 741define %s_i64 @test_s_i64(%s_i64 %a) { 742 %r = tail call %s_i64 @test_s_i64(%s_i64 %a); 743 ret %s_i64 %r; 744} 745 746; Fields that have different types, but identical sizes are not vectorized. 747; CHECK: .func (.param .align 8 .b8 func_retval0[24]) 748; CHECK-LABEL: test_s_i32f32( 749; CHECK: .param .align 8 .b8 test_s_i32f32_param_0[24] 750; CHECK-DAG: ld.param.u64 [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16]; 751; CHECK-DAG: ld.param.f32 [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12]; 752; CHECK-DAG: ld.param.u32 [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8]; 753; CHECK-DAG: ld.param.f32 [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4]; 754; CHECK-DAG: ld.param.u32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; 755; CHECK: .param .align 8 .b8 param0[24]; 756; CHECK-DAG: st.param.b32 [param0+0], [[E0]]; 757; CHECK-DAG: st.param.f32 [param0+4], [[E1]]; 758; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; 759; CHECK-DAG: st.param.f32 [param0+12], [[E3]]; 760; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; 761; CHECK: .param .align 8 .b8 retval0[24]; 762; CHECK: call.uni (retval0), 763; CHECK-NEXT: test_s_i32f32, 764; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0+0]; 765; CHECK-DAG: ld.param.f32 [[RE1:%f[0-9]+]], [retval0+4]; 766; CHECK-DAG: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 767; CHECK-DAG: ld.param.f32 [[RE3:%f[0-9]+]], [retval0+12]; 768; CHECK-DAG: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 769; CHECK-DAG: st.param.b32 [func_retval0+0], [[RE0]]; 770; CHECK-DAG: st.param.f32 [func_retval0+4], [[RE1]]; 771; CHECK-DAG: st.param.b32 [func_retval0+8], [[RE2]]; 772; CHECK-DAG: st.param.f32 [func_retval0+12], [[RE3]]; 773; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 774; CHECK: ret; 775define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { 776 %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a); 777 ret %s_i32f32 %r; 778} 779 780; We do vectorize consecutive fields with matching types. 781; CHECK:.visible .func (.param .align 8 .b8 func_retval0[24]) 782; CHECK-LABEL: test_s_i32x4( 783; CHECK: .param .align 8 .b8 test_s_i32x4_param_0[24] 784; CHECK-DAG: ld.param.u64 [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16]; 785; CHECK-DAG: ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; 786; CHECK-DAG: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; 787; CHECK: .param .align 8 .b8 param0[24]; 788; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 789; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; 790; CHECK: st.param.b64 [param0+16], [[E4]]; 791; CHECK: .param .align 8 .b8 retval0[24]; 792; CHECK: call.uni (retval0), 793; CHECK-NEXT: test_s_i32x4, 794; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 795; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; 796; CHECK: ld.param.b64 [[RE4:%rd[0-9]+]], [retval0+16]; 797; CHECK-DAG: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 798; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]}; 799; CHECK-DAG: st.param.b64 [func_retval0+16], [[RE4]]; 800; CHECK: ret; 801 802define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { 803 %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a); 804 ret %s_i32x4 %r; 805} 806 807; CHECK:.visible .func (.param .align 8 .b8 func_retval0[32]) 808; CHECK-LABEL: test_s_i1i32x4( 809; CHECK: .param .align 8 .b8 test_s_i1i32x4_param_0[32] 810; CHECK: ld.param.u64 [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24]; 811; CHECK: ld.param.u32 [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16]; 812; CHECK: ld.param.u32 [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12]; 813; CHECK: ld.param.u8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; 814; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; 815; CHECK: .param .align 8 .b8 param0[32]; 816; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 817; CHECK: st.param.b8 [param0+8], [[E2]]; 818; CHECK: st.param.b32 [param0+12], [[E3]]; 819; CHECK: st.param.b32 [param0+16], [[E4]]; 820; CHECK: st.param.b64 [param0+24], [[E5]]; 821; CHECK: .param .align 8 .b8 retval0[32]; 822; CHECK: call.uni (retval0), 823; CHECK: test_s_i1i32x4, 824; CHECK: ( 825; CHECK: param0 826; CHECK: ); 827; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 828; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; 829; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; 830; CHECK: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; 831; CHECK: ld.param.b64 [[RE5:%rd[0-9]+]], [retval0+24]; 832; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 833; CHECK: st.param.b8 [func_retval0+8], [[RE2]]; 834; CHECK: st.param.b32 [func_retval0+12], [[RE3]]; 835; CHECK: st.param.b32 [func_retval0+16], [[RE4]]; 836; CHECK: st.param.b64 [func_retval0+24], [[RE5]]; 837; CHECK: ret; 838 839define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { 840 %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a); 841 ret %s_i8i32x4 %r; 842} 843 844; -- All loads/stores from parameters aligned by one must be done one 845; -- byte at a time. 846; CHECK:.visible .func (.param .align 1 .b8 func_retval0[25]) 847; CHECK-LABEL: test_s_i1i32x4p( 848; CHECK-DAG: .param .align 1 .b8 test_s_i1i32x4p_param_0[25] 849; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+24]; 850; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+23]; 851; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+22]; 852; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+21]; 853; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+20]; 854; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+19]; 855; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+18]; 856; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+17]; 857; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+16]; 858; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+15]; 859; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+14]; 860; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+13]; 861; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+12]; 862; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+11]; 863; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+10]; 864; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+9]; 865; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+8]; 866; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+7]; 867; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+6]; 868; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+5]; 869; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+4]; 870; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+3]; 871; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+2]; 872; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; 873; CHECK-DAG: ld.param.u8 %r{{.*}}, [test_s_i1i32x4p_param_0]; 874; --- TODO 875; --- Unaligned parameter store/ return value load is broken in both nvcc 876; --- and llvm and needs to be fixed. 877; CHECK: .param .align 1 .b8 param0[25]; 878; CHECK-DAG: st.param.b32 [param0+0], 879; CHECK-DAG: st.param.b32 [param0+4], 880; CHECK-DAG: st.param.b8 [param0+8], 881; CHECK-DAG: st.param.b32 [param0+9], 882; CHECK-DAG: st.param.b32 [param0+13], 883; CHECK-DAG: st.param.b64 [param0+17], 884; CHECK: .param .align 1 .b8 retval0[25]; 885; CHECK: call.uni (retval0), 886; CHECK-NEXT: test_s_i1i32x4p, 887; CHECK-DAG: ld.param.b32 %r41, [retval0+0]; 888; CHECK-DAG: ld.param.b32 %r42, [retval0+4]; 889; CHECK-DAG: ld.param.b8 %rs2, [retval0+8]; 890; CHECK-DAG: ld.param.b32 %r43, [retval0+9]; 891; CHECK-DAG: ld.param.b32 %r44, [retval0+13]; 892; CHECK-DAG: ld.param.b64 %rd23, [retval0+17]; 893; CHECK-DAG: st.param.b32 [func_retval0+0], 894; CHECK-DAG: st.param.b32 [func_retval0+4], 895; CHECK-DAG: st.param.b8 [func_retval0+8], 896; CHECK-DAG: st.param.b32 [func_retval0+9], 897; CHECK-DAG: st.param.b32 [func_retval0+13], 898; CHECK-DAG: st.param.b64 [func_retval0+17], 899 900define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { 901 %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a); 902 ret %s_i8i32x4p %r; 903} 904 905; Check that we can vectorize loads that span multiple aggregate fields. 906; CHECK:.visible .func (.param .align 16 .b8 func_retval0[80]) 907; CHECK-LABEL: test_s_crossfield( 908; CHECK: .param .align 16 .b8 test_s_crossfield_param_0[80] 909; CHECK: ld.param.u32 [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64]; 910; CHECK: ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48]; 911; CHECK: ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32]; 912; CHECK: ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16]; 913; CHECK: ld.param.u32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; 914; CHECK: ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; 915; CHECK: .param .align 16 .b8 param0[80]; 916; CHECK: st.param.v2.b32 [param0+0], {[[E0]], [[E1]]}; 917; CHECK: st.param.b32 [param0+8], [[E2]]; 918; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; 919; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; 920; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; 921; CHECK: st.param.b32 [param0+64], [[E15]]; 922; CHECK: .param .align 16 .b8 retval0[80]; 923; CHECK: call.uni (retval0), 924; CHECK: test_s_crossfield, 925; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0]; 926; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; 927; CHECK: ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16]; 928; CHECK: ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32]; 929; CHECK: ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48]; 930; CHECK: ld.param.b32 [[RE15:%r[0-9]+]], [retval0+64]; 931; CHECK: st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]}; 932; CHECK: st.param.b32 [func_retval0+8], [[RE2]]; 933; CHECK: st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]}; 934; CHECK: st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]}; 935; CHECK: st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]}; 936; CHECK: st.param.b32 [func_retval0+64], [[RE15]]; 937; CHECK: ret; 938 939define %s_crossfield @test_s_crossfield(%s_crossfield %a) { 940 %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a); 941 ret %s_crossfield %r; 942} 943