1; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s --check-prefix=SI --check-prefix=GCN --check-prefix=FUNC 2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=GCN --check-prefix=FUNC 3; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC 4; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC 5 6; FUNC-LABEL: {{^}}i8_arg: 7; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 8; GCN: buffer_load_ubyte 9 10define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { 11entry: 12 %0 = zext i8 %in to i32 13 store i32 %0, i32 addrspace(1)* %out, align 4 14 ret void 15} 16 17; FUNC-LABEL: {{^}}i8_zext_arg: 18; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 19; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 20; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 21 22define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { 23entry: 24 %0 = zext i8 %in to i32 25 store i32 %0, i32 addrspace(1)* %out, align 4 26 ret void 27} 28 29; FUNC-LABEL: {{^}}i8_sext_arg: 30; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 31; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 32; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 33 34define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { 35entry: 36 %0 = sext i8 %in to i32 37 store i32 %0, i32 addrspace(1)* %out, align 4 38 ret void 39} 40 41; FUNC-LABEL: {{^}}i16_arg: 42; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 43; GCN: buffer_load_ushort 44 45define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { 46entry: 47 %0 = zext i16 %in to i32 48 store i32 %0, i32 addrspace(1)* %out, align 4 49 ret void 50} 51 52; FUNC-LABEL: {{^}}i16_zext_arg: 53; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 54; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 55; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 56 57define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { 58entry: 59 %0 = zext i16 %in to i32 60 store i32 %0, i32 addrspace(1)* %out, align 4 61 ret void 62} 63 64; FUNC-LABEL: {{^}}i16_sext_arg: 65; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 66; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 67; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 68 69define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { 70entry: 71 %0 = sext i16 %in to i32 72 store i32 %0, i32 addrspace(1)* %out, align 4 73 ret void 74} 75 76; FUNC-LABEL: {{^}}i32_arg: 77; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 78; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 79; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 80define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { 81entry: 82 store i32 %in, i32 addrspace(1)* %out, align 4 83 ret void 84} 85 86; FUNC-LABEL: {{^}}f32_arg: 87; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 88; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 89; VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 90define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { 91entry: 92 store float %in, float addrspace(1)* %out, align 4 93 ret void 94} 95 96; FUNC-LABEL: {{^}}v2i8_arg: 97; EG: VTX_READ_8 98; EG: VTX_READ_8 99; GCN: buffer_load_ubyte 100; GCN: buffer_load_ubyte 101define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { 102entry: 103 store <2 x i8> %in, <2 x i8> addrspace(1)* %out 104 ret void 105} 106 107; FUNC-LABEL: {{^}}v2i16_arg: 108; EG: VTX_READ_16 109; EG: VTX_READ_16 110; GCN-DAG: buffer_load_ushort 111; GCN-DAG: buffer_load_ushort 112define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { 113entry: 114 store <2 x i16> %in, <2 x i16> addrspace(1)* %out 115 ret void 116} 117 118; FUNC-LABEL: {{^}}v2i32_arg: 119; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 120; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 121; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 122; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c 123define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { 124entry: 125 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 126 ret void 127} 128 129; FUNC-LABEL: {{^}}v2f32_arg: 130; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 131; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 132; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 133; VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c 134define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { 135entry: 136 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 137 ret void 138} 139 140; FUNC-LABEL: {{^}}v3i8_arg: 141; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 142; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 143; VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 144define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { 145entry: 146 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 147 ret void 148} 149 150; FUNC-LABEL: {{^}}v3i16_arg: 151; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 152; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 153; VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 154define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { 155entry: 156 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 157 ret void 158} 159; FUNC-LABEL: {{^}}v3i32_arg: 160; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 161; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 162; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 163; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 164; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 165define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { 166entry: 167 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 168 ret void 169} 170 171; FUNC-LABEL: {{^}}v3f32_arg: 172; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 173; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 174; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 175; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 176; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 177define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { 178entry: 179 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 180 ret void 181} 182 183; FUNC-LABEL: {{^}}v4i8_arg: 184; EG: VTX_READ_8 185; EG: VTX_READ_8 186; EG: VTX_READ_8 187; EG: VTX_READ_8 188; GCN: buffer_load_ubyte 189; GCN: buffer_load_ubyte 190; GCN: buffer_load_ubyte 191; GCN: buffer_load_ubyte 192define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { 193entry: 194 store <4 x i8> %in, <4 x i8> addrspace(1)* %out 195 ret void 196} 197 198; FUNC-LABEL: {{^}}v4i16_arg: 199; EG: VTX_READ_16 200; EG: VTX_READ_16 201; EG: VTX_READ_16 202; EG: VTX_READ_16 203; GCN: buffer_load_ushort 204; GCN: buffer_load_ushort 205; GCN: buffer_load_ushort 206; GCN: buffer_load_ushort 207define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { 208entry: 209 store <4 x i16> %in, <4 x i16> addrspace(1)* %out 210 ret void 211} 212 213; FUNC-LABEL: {{^}}v4i32_arg: 214; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 215; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 216; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 217; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 218; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 219; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 220define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { 221entry: 222 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 223 ret void 224} 225 226; FUNC-LABEL: {{^}}v4f32_arg: 227; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 228; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 229; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 230; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 231; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 232; VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 233define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { 234entry: 235 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 236 ret void 237} 238 239; FUNC-LABEL: {{^}}v8i8_arg: 240; EG: VTX_READ_8 241; EG: VTX_READ_8 242; EG: VTX_READ_8 243; EG: VTX_READ_8 244; EG: VTX_READ_8 245; EG: VTX_READ_8 246; EG: VTX_READ_8 247; EG: VTX_READ_8 248; GCN: buffer_load_ubyte 249; GCN: buffer_load_ubyte 250; GCN: buffer_load_ubyte 251; GCN: buffer_load_ubyte 252; GCN: buffer_load_ubyte 253; GCN: buffer_load_ubyte 254; GCN: buffer_load_ubyte 255define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { 256entry: 257 store <8 x i8> %in, <8 x i8> addrspace(1)* %out 258 ret void 259} 260 261; FUNC-LABEL: {{^}}v8i16_arg: 262; EG: VTX_READ_16 263; EG: VTX_READ_16 264; EG: VTX_READ_16 265; EG: VTX_READ_16 266; EG: VTX_READ_16 267; EG: VTX_READ_16 268; EG: VTX_READ_16 269; EG: VTX_READ_16 270; GCN: buffer_load_ushort 271; GCN: buffer_load_ushort 272; GCN: buffer_load_ushort 273; GCN: buffer_load_ushort 274; GCN: buffer_load_ushort 275; GCN: buffer_load_ushort 276; GCN: buffer_load_ushort 277; GCN: buffer_load_ushort 278define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { 279entry: 280 store <8 x i16> %in, <8 x i16> addrspace(1)* %out 281 ret void 282} 283 284; FUNC-LABEL: {{^}}v8i32_arg: 285; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 286; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 287; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 288; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 289; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 290; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 291; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 292; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 293; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 294; VI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x44 295define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { 296entry: 297 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 298 ret void 299} 300 301; FUNC-LABEL: {{^}}v8f32_arg: 302; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 303; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 304; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 305; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 306; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 307; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 308; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 309; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 310; SI: s_load_dwordx8 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x11 311define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { 312entry: 313 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 314 ret void 315} 316 317; FUNC-LABEL: {{^}}v16i8_arg: 318; EG: VTX_READ_8 319; EG: VTX_READ_8 320; EG: VTX_READ_8 321; EG: VTX_READ_8 322; EG: VTX_READ_8 323; EG: VTX_READ_8 324; EG: VTX_READ_8 325; EG: VTX_READ_8 326; EG: VTX_READ_8 327; EG: VTX_READ_8 328; EG: VTX_READ_8 329; EG: VTX_READ_8 330; EG: VTX_READ_8 331; EG: VTX_READ_8 332; EG: VTX_READ_8 333; EG: VTX_READ_8 334; GCN: buffer_load_ubyte 335; GCN: buffer_load_ubyte 336; GCN: buffer_load_ubyte 337; GCN: buffer_load_ubyte 338; GCN: buffer_load_ubyte 339; GCN: buffer_load_ubyte 340; GCN: buffer_load_ubyte 341; GCN: buffer_load_ubyte 342; GCN: buffer_load_ubyte 343; GCN: buffer_load_ubyte 344; GCN: buffer_load_ubyte 345; GCN: buffer_load_ubyte 346; GCN: buffer_load_ubyte 347; GCN: buffer_load_ubyte 348; GCN: buffer_load_ubyte 349; GCN: buffer_load_ubyte 350define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { 351entry: 352 store <16 x i8> %in, <16 x i8> addrspace(1)* %out 353 ret void 354} 355 356; FUNC-LABEL: {{^}}v16i16_arg: 357; EG: VTX_READ_16 358; EG: VTX_READ_16 359; EG: VTX_READ_16 360; EG: VTX_READ_16 361; EG: VTX_READ_16 362; EG: VTX_READ_16 363; EG: VTX_READ_16 364; EG: VTX_READ_16 365; EG: VTX_READ_16 366; EG: VTX_READ_16 367; EG: VTX_READ_16 368; EG: VTX_READ_16 369; EG: VTX_READ_16 370; EG: VTX_READ_16 371; EG: VTX_READ_16 372; EG: VTX_READ_16 373; GCN: buffer_load_ushort 374; GCN: buffer_load_ushort 375; GCN: buffer_load_ushort 376; GCN: buffer_load_ushort 377; GCN: buffer_load_ushort 378; GCN: buffer_load_ushort 379; GCN: buffer_load_ushort 380; GCN: buffer_load_ushort 381; GCN: buffer_load_ushort 382; GCN: buffer_load_ushort 383; GCN: buffer_load_ushort 384; GCN: buffer_load_ushort 385; GCN: buffer_load_ushort 386; GCN: buffer_load_ushort 387; GCN: buffer_load_ushort 388; GCN: buffer_load_ushort 389define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { 390entry: 391 store <16 x i16> %in, <16 x i16> addrspace(1)* %out 392 ret void 393} 394 395; FUNC-LABEL: {{^}}v16i32_arg: 396; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 397; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 398; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 399; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 400; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 401; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 402; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 403; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 404; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 405; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 406; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 407; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 408; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 409; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 410; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 411; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 412; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 413; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 414define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { 415entry: 416 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 417 ret void 418} 419 420; FUNC-LABEL: {{^}}v16f32_arg: 421; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 422; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 423; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 424; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 425; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 426; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 427; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 428; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 429; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 430; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 431; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 432; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 433; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 434; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 435; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 436; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 437; SI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x19 438; VI: s_load_dwordx16 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x64 439define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { 440entry: 441 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 442 ret void 443} 444 445; FUNC-LABEL: {{^}}kernel_arg_i64: 446; GCN: s_load_dwordx2 447; GCN: s_load_dwordx2 448; GCN: buffer_store_dwordx2 449define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { 450 store i64 %a, i64 addrspace(1)* %out, align 8 451 ret void 452} 453 454; XFUNC-LABEL: {{^}}kernel_arg_v1i64: 455; XGCN: s_load_dwordx2 456; XGCN: s_load_dwordx2 457; XGCN: buffer_store_dwordx2 458; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { 459; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 460; ret void 461; } 462