1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4 5define amdgpu_ps float @atomic_swap_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 6; GFX9-LABEL: atomic_swap_i32_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: s_mov_b32 s0, s2 9; GFX9-NEXT: s_mov_b32 s1, s3 10; GFX9-NEXT: s_mov_b32 s2, s4 11; GFX9-NEXT: s_mov_b32 s3, s5 12; GFX9-NEXT: s_mov_b32 s4, s6 13; GFX9-NEXT: s_mov_b32 s5, s7 14; GFX9-NEXT: s_mov_b32 s6, s8 15; GFX9-NEXT: s_mov_b32 s7, s9 16; GFX9-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 unorm glc a16 17; GFX9-NEXT: s_waitcnt vmcnt(0) 18; GFX9-NEXT: ; return to shader part epilog 19; 20; GFX10-LABEL: atomic_swap_i32_1d: 21; GFX10: ; %bb.0: ; %main_body 22; GFX10-NEXT: s_mov_b32 s0, s2 23; GFX10-NEXT: s_mov_b32 s1, s3 24; GFX10-NEXT: s_mov_b32 s2, s4 25; GFX10-NEXT: s_mov_b32 s3, s5 26; GFX10-NEXT: s_mov_b32 s4, s6 27; GFX10-NEXT: s_mov_b32 s5, s7 28; GFX10-NEXT: s_mov_b32 s6, s8 29; GFX10-NEXT: s_mov_b32 s7, s9 30; GFX10-NEXT: image_atomic_swap v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 31; GFX10-NEXT: s_waitcnt vmcnt(0) 32; GFX10-NEXT: ; return to shader part epilog 33main_body: 34 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 35 %out = bitcast i32 %v to float 36 ret float %out 37} 38 39define amdgpu_ps float @atomic_add_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 40; GFX9-LABEL: atomic_add_i32_1d: 41; GFX9: ; %bb.0: ; %main_body 42; GFX9-NEXT: s_mov_b32 s0, s2 43; GFX9-NEXT: s_mov_b32 s1, s3 44; GFX9-NEXT: s_mov_b32 s2, s4 45; GFX9-NEXT: s_mov_b32 s3, s5 46; GFX9-NEXT: s_mov_b32 s4, s6 47; GFX9-NEXT: s_mov_b32 s5, s7 48; GFX9-NEXT: s_mov_b32 s6, s8 49; GFX9-NEXT: s_mov_b32 s7, s9 50; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 51; GFX9-NEXT: s_waitcnt vmcnt(0) 52; GFX9-NEXT: ; return to shader part epilog 53; 54; GFX10-LABEL: atomic_add_i32_1d: 55; GFX10: ; %bb.0: ; %main_body 56; GFX10-NEXT: s_mov_b32 s0, s2 57; GFX10-NEXT: s_mov_b32 s1, s3 58; GFX10-NEXT: s_mov_b32 s2, s4 59; GFX10-NEXT: s_mov_b32 s3, s5 60; GFX10-NEXT: s_mov_b32 s4, s6 61; GFX10-NEXT: s_mov_b32 s5, s7 62; GFX10-NEXT: s_mov_b32 s6, s8 63; GFX10-NEXT: s_mov_b32 s7, s9 64; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 65; GFX10-NEXT: s_waitcnt vmcnt(0) 66; GFX10-NEXT: ; return to shader part epilog 67main_body: 68 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 69 %out = bitcast i32 %v to float 70 ret float %out 71} 72 73define amdgpu_ps float @atomic_sub_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 74; GFX9-LABEL: atomic_sub_i32_1d: 75; GFX9: ; %bb.0: ; %main_body 76; GFX9-NEXT: s_mov_b32 s0, s2 77; GFX9-NEXT: s_mov_b32 s1, s3 78; GFX9-NEXT: s_mov_b32 s2, s4 79; GFX9-NEXT: s_mov_b32 s3, s5 80; GFX9-NEXT: s_mov_b32 s4, s6 81; GFX9-NEXT: s_mov_b32 s5, s7 82; GFX9-NEXT: s_mov_b32 s6, s8 83; GFX9-NEXT: s_mov_b32 s7, s9 84; GFX9-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 unorm glc a16 85; GFX9-NEXT: s_waitcnt vmcnt(0) 86; GFX9-NEXT: ; return to shader part epilog 87; 88; GFX10-LABEL: atomic_sub_i32_1d: 89; GFX10: ; %bb.0: ; %main_body 90; GFX10-NEXT: s_mov_b32 s0, s2 91; GFX10-NEXT: s_mov_b32 s1, s3 92; GFX10-NEXT: s_mov_b32 s2, s4 93; GFX10-NEXT: s_mov_b32 s3, s5 94; GFX10-NEXT: s_mov_b32 s4, s6 95; GFX10-NEXT: s_mov_b32 s5, s7 96; GFX10-NEXT: s_mov_b32 s6, s8 97; GFX10-NEXT: s_mov_b32 s7, s9 98; GFX10-NEXT: image_atomic_sub v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 99; GFX10-NEXT: s_waitcnt vmcnt(0) 100; GFX10-NEXT: ; return to shader part epilog 101main_body: 102 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 103 %out = bitcast i32 %v to float 104 ret float %out 105} 106 107define amdgpu_ps float @atomic_smin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 108; GFX9-LABEL: atomic_smin_i32_1d: 109; GFX9: ; %bb.0: ; %main_body 110; GFX9-NEXT: s_mov_b32 s0, s2 111; GFX9-NEXT: s_mov_b32 s1, s3 112; GFX9-NEXT: s_mov_b32 s2, s4 113; GFX9-NEXT: s_mov_b32 s3, s5 114; GFX9-NEXT: s_mov_b32 s4, s6 115; GFX9-NEXT: s_mov_b32 s5, s7 116; GFX9-NEXT: s_mov_b32 s6, s8 117; GFX9-NEXT: s_mov_b32 s7, s9 118; GFX9-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 unorm glc a16 119; GFX9-NEXT: s_waitcnt vmcnt(0) 120; GFX9-NEXT: ; return to shader part epilog 121; 122; GFX10-LABEL: atomic_smin_i32_1d: 123; GFX10: ; %bb.0: ; %main_body 124; GFX10-NEXT: s_mov_b32 s0, s2 125; GFX10-NEXT: s_mov_b32 s1, s3 126; GFX10-NEXT: s_mov_b32 s2, s4 127; GFX10-NEXT: s_mov_b32 s3, s5 128; GFX10-NEXT: s_mov_b32 s4, s6 129; GFX10-NEXT: s_mov_b32 s5, s7 130; GFX10-NEXT: s_mov_b32 s6, s8 131; GFX10-NEXT: s_mov_b32 s7, s9 132; GFX10-NEXT: image_atomic_smin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 133; GFX10-NEXT: s_waitcnt vmcnt(0) 134; GFX10-NEXT: ; return to shader part epilog 135main_body: 136 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 137 %out = bitcast i32 %v to float 138 ret float %out 139} 140 141define amdgpu_ps float @atomic_umin_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 142; GFX9-LABEL: atomic_umin_i32_1d: 143; GFX9: ; %bb.0: ; %main_body 144; GFX9-NEXT: s_mov_b32 s0, s2 145; GFX9-NEXT: s_mov_b32 s1, s3 146; GFX9-NEXT: s_mov_b32 s2, s4 147; GFX9-NEXT: s_mov_b32 s3, s5 148; GFX9-NEXT: s_mov_b32 s4, s6 149; GFX9-NEXT: s_mov_b32 s5, s7 150; GFX9-NEXT: s_mov_b32 s6, s8 151; GFX9-NEXT: s_mov_b32 s7, s9 152; GFX9-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 unorm glc a16 153; GFX9-NEXT: s_waitcnt vmcnt(0) 154; GFX9-NEXT: ; return to shader part epilog 155; 156; GFX10-LABEL: atomic_umin_i32_1d: 157; GFX10: ; %bb.0: ; %main_body 158; GFX10-NEXT: s_mov_b32 s0, s2 159; GFX10-NEXT: s_mov_b32 s1, s3 160; GFX10-NEXT: s_mov_b32 s2, s4 161; GFX10-NEXT: s_mov_b32 s3, s5 162; GFX10-NEXT: s_mov_b32 s4, s6 163; GFX10-NEXT: s_mov_b32 s5, s7 164; GFX10-NEXT: s_mov_b32 s6, s8 165; GFX10-NEXT: s_mov_b32 s7, s9 166; GFX10-NEXT: image_atomic_umin v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 167; GFX10-NEXT: s_waitcnt vmcnt(0) 168; GFX10-NEXT: ; return to shader part epilog 169main_body: 170 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 171 %out = bitcast i32 %v to float 172 ret float %out 173} 174 175define amdgpu_ps float @atomic_smax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 176; GFX9-LABEL: atomic_smax_i32_1d: 177; GFX9: ; %bb.0: ; %main_body 178; GFX9-NEXT: s_mov_b32 s0, s2 179; GFX9-NEXT: s_mov_b32 s1, s3 180; GFX9-NEXT: s_mov_b32 s2, s4 181; GFX9-NEXT: s_mov_b32 s3, s5 182; GFX9-NEXT: s_mov_b32 s4, s6 183; GFX9-NEXT: s_mov_b32 s5, s7 184; GFX9-NEXT: s_mov_b32 s6, s8 185; GFX9-NEXT: s_mov_b32 s7, s9 186; GFX9-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 unorm glc a16 187; GFX9-NEXT: s_waitcnt vmcnt(0) 188; GFX9-NEXT: ; return to shader part epilog 189; 190; GFX10-LABEL: atomic_smax_i32_1d: 191; GFX10: ; %bb.0: ; %main_body 192; GFX10-NEXT: s_mov_b32 s0, s2 193; GFX10-NEXT: s_mov_b32 s1, s3 194; GFX10-NEXT: s_mov_b32 s2, s4 195; GFX10-NEXT: s_mov_b32 s3, s5 196; GFX10-NEXT: s_mov_b32 s4, s6 197; GFX10-NEXT: s_mov_b32 s5, s7 198; GFX10-NEXT: s_mov_b32 s6, s8 199; GFX10-NEXT: s_mov_b32 s7, s9 200; GFX10-NEXT: image_atomic_smax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 201; GFX10-NEXT: s_waitcnt vmcnt(0) 202; GFX10-NEXT: ; return to shader part epilog 203main_body: 204 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 205 %out = bitcast i32 %v to float 206 ret float %out 207} 208 209define amdgpu_ps float @atomic_umax_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 210; GFX9-LABEL: atomic_umax_i32_1d: 211; GFX9: ; %bb.0: ; %main_body 212; GFX9-NEXT: s_mov_b32 s0, s2 213; GFX9-NEXT: s_mov_b32 s1, s3 214; GFX9-NEXT: s_mov_b32 s2, s4 215; GFX9-NEXT: s_mov_b32 s3, s5 216; GFX9-NEXT: s_mov_b32 s4, s6 217; GFX9-NEXT: s_mov_b32 s5, s7 218; GFX9-NEXT: s_mov_b32 s6, s8 219; GFX9-NEXT: s_mov_b32 s7, s9 220; GFX9-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 unorm glc a16 221; GFX9-NEXT: s_waitcnt vmcnt(0) 222; GFX9-NEXT: ; return to shader part epilog 223; 224; GFX10-LABEL: atomic_umax_i32_1d: 225; GFX10: ; %bb.0: ; %main_body 226; GFX10-NEXT: s_mov_b32 s0, s2 227; GFX10-NEXT: s_mov_b32 s1, s3 228; GFX10-NEXT: s_mov_b32 s2, s4 229; GFX10-NEXT: s_mov_b32 s3, s5 230; GFX10-NEXT: s_mov_b32 s4, s6 231; GFX10-NEXT: s_mov_b32 s5, s7 232; GFX10-NEXT: s_mov_b32 s6, s8 233; GFX10-NEXT: s_mov_b32 s7, s9 234; GFX10-NEXT: image_atomic_umax v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 235; GFX10-NEXT: s_waitcnt vmcnt(0) 236; GFX10-NEXT: ; return to shader part epilog 237main_body: 238 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 239 %out = bitcast i32 %v to float 240 ret float %out 241} 242 243define amdgpu_ps float @atomic_and_i321d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 244; GFX9-LABEL: atomic_and_i321d: 245; GFX9: ; %bb.0: ; %main_body 246; GFX9-NEXT: s_mov_b32 s0, s2 247; GFX9-NEXT: s_mov_b32 s1, s3 248; GFX9-NEXT: s_mov_b32 s2, s4 249; GFX9-NEXT: s_mov_b32 s3, s5 250; GFX9-NEXT: s_mov_b32 s4, s6 251; GFX9-NEXT: s_mov_b32 s5, s7 252; GFX9-NEXT: s_mov_b32 s6, s8 253; GFX9-NEXT: s_mov_b32 s7, s9 254; GFX9-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 unorm glc a16 255; GFX9-NEXT: s_waitcnt vmcnt(0) 256; GFX9-NEXT: ; return to shader part epilog 257; 258; GFX10-LABEL: atomic_and_i321d: 259; GFX10: ; %bb.0: ; %main_body 260; GFX10-NEXT: s_mov_b32 s0, s2 261; GFX10-NEXT: s_mov_b32 s1, s3 262; GFX10-NEXT: s_mov_b32 s2, s4 263; GFX10-NEXT: s_mov_b32 s3, s5 264; GFX10-NEXT: s_mov_b32 s4, s6 265; GFX10-NEXT: s_mov_b32 s5, s7 266; GFX10-NEXT: s_mov_b32 s6, s8 267; GFX10-NEXT: s_mov_b32 s7, s9 268; GFX10-NEXT: image_atomic_and v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 269; GFX10-NEXT: s_waitcnt vmcnt(0) 270; GFX10-NEXT: ; return to shader part epilog 271main_body: 272 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 273 %out = bitcast i32 %v to float 274 ret float %out 275} 276 277define amdgpu_ps float @atomic_or_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 278; GFX9-LABEL: atomic_or_i32_1d: 279; GFX9: ; %bb.0: ; %main_body 280; GFX9-NEXT: s_mov_b32 s0, s2 281; GFX9-NEXT: s_mov_b32 s1, s3 282; GFX9-NEXT: s_mov_b32 s2, s4 283; GFX9-NEXT: s_mov_b32 s3, s5 284; GFX9-NEXT: s_mov_b32 s4, s6 285; GFX9-NEXT: s_mov_b32 s5, s7 286; GFX9-NEXT: s_mov_b32 s6, s8 287; GFX9-NEXT: s_mov_b32 s7, s9 288; GFX9-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 unorm glc a16 289; GFX9-NEXT: s_waitcnt vmcnt(0) 290; GFX9-NEXT: ; return to shader part epilog 291; 292; GFX10-LABEL: atomic_or_i32_1d: 293; GFX10: ; %bb.0: ; %main_body 294; GFX10-NEXT: s_mov_b32 s0, s2 295; GFX10-NEXT: s_mov_b32 s1, s3 296; GFX10-NEXT: s_mov_b32 s2, s4 297; GFX10-NEXT: s_mov_b32 s3, s5 298; GFX10-NEXT: s_mov_b32 s4, s6 299; GFX10-NEXT: s_mov_b32 s5, s7 300; GFX10-NEXT: s_mov_b32 s6, s8 301; GFX10-NEXT: s_mov_b32 s7, s9 302; GFX10-NEXT: image_atomic_or v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 303; GFX10-NEXT: s_waitcnt vmcnt(0) 304; GFX10-NEXT: ; return to shader part epilog 305main_body: 306 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 307 %out = bitcast i32 %v to float 308 ret float %out 309} 310 311define amdgpu_ps float @atomic_xor_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 312; GFX9-LABEL: atomic_xor_i32_1d: 313; GFX9: ; %bb.0: ; %main_body 314; GFX9-NEXT: s_mov_b32 s0, s2 315; GFX9-NEXT: s_mov_b32 s1, s3 316; GFX9-NEXT: s_mov_b32 s2, s4 317; GFX9-NEXT: s_mov_b32 s3, s5 318; GFX9-NEXT: s_mov_b32 s4, s6 319; GFX9-NEXT: s_mov_b32 s5, s7 320; GFX9-NEXT: s_mov_b32 s6, s8 321; GFX9-NEXT: s_mov_b32 s7, s9 322; GFX9-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 unorm glc a16 323; GFX9-NEXT: s_waitcnt vmcnt(0) 324; GFX9-NEXT: ; return to shader part epilog 325; 326; GFX10-LABEL: atomic_xor_i32_1d: 327; GFX10: ; %bb.0: ; %main_body 328; GFX10-NEXT: s_mov_b32 s0, s2 329; GFX10-NEXT: s_mov_b32 s1, s3 330; GFX10-NEXT: s_mov_b32 s2, s4 331; GFX10-NEXT: s_mov_b32 s3, s5 332; GFX10-NEXT: s_mov_b32 s4, s6 333; GFX10-NEXT: s_mov_b32 s5, s7 334; GFX10-NEXT: s_mov_b32 s6, s8 335; GFX10-NEXT: s_mov_b32 s7, s9 336; GFX10-NEXT: image_atomic_xor v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 337; GFX10-NEXT: s_waitcnt vmcnt(0) 338; GFX10-NEXT: ; return to shader part epilog 339main_body: 340 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 341 %out = bitcast i32 %v to float 342 ret float %out 343} 344 345define amdgpu_ps float @atomic_inc_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 346; GFX9-LABEL: atomic_inc_i32_1d: 347; GFX9: ; %bb.0: ; %main_body 348; GFX9-NEXT: s_mov_b32 s0, s2 349; GFX9-NEXT: s_mov_b32 s1, s3 350; GFX9-NEXT: s_mov_b32 s2, s4 351; GFX9-NEXT: s_mov_b32 s3, s5 352; GFX9-NEXT: s_mov_b32 s4, s6 353; GFX9-NEXT: s_mov_b32 s5, s7 354; GFX9-NEXT: s_mov_b32 s6, s8 355; GFX9-NEXT: s_mov_b32 s7, s9 356; GFX9-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 unorm glc a16 357; GFX9-NEXT: s_waitcnt vmcnt(0) 358; GFX9-NEXT: ; return to shader part epilog 359; 360; GFX10-LABEL: atomic_inc_i32_1d: 361; GFX10: ; %bb.0: ; %main_body 362; GFX10-NEXT: s_mov_b32 s0, s2 363; GFX10-NEXT: s_mov_b32 s1, s3 364; GFX10-NEXT: s_mov_b32 s2, s4 365; GFX10-NEXT: s_mov_b32 s3, s5 366; GFX10-NEXT: s_mov_b32 s4, s6 367; GFX10-NEXT: s_mov_b32 s5, s7 368; GFX10-NEXT: s_mov_b32 s6, s8 369; GFX10-NEXT: s_mov_b32 s7, s9 370; GFX10-NEXT: image_atomic_inc v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 371; GFX10-NEXT: s_waitcnt vmcnt(0) 372; GFX10-NEXT: ; return to shader part epilog 373main_body: 374 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 375 %out = bitcast i32 %v to float 376 ret float %out 377} 378 379define amdgpu_ps float @atomic_dec_i32_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 380; GFX9-LABEL: atomic_dec_i32_1d: 381; GFX9: ; %bb.0: ; %main_body 382; GFX9-NEXT: s_mov_b32 s0, s2 383; GFX9-NEXT: s_mov_b32 s1, s3 384; GFX9-NEXT: s_mov_b32 s2, s4 385; GFX9-NEXT: s_mov_b32 s3, s5 386; GFX9-NEXT: s_mov_b32 s4, s6 387; GFX9-NEXT: s_mov_b32 s5, s7 388; GFX9-NEXT: s_mov_b32 s6, s8 389; GFX9-NEXT: s_mov_b32 s7, s9 390; GFX9-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 unorm glc a16 391; GFX9-NEXT: s_waitcnt vmcnt(0) 392; GFX9-NEXT: ; return to shader part epilog 393; 394; GFX10-LABEL: atomic_dec_i32_1d: 395; GFX10: ; %bb.0: ; %main_body 396; GFX10-NEXT: s_mov_b32 s0, s2 397; GFX10-NEXT: s_mov_b32 s1, s3 398; GFX10-NEXT: s_mov_b32 s2, s4 399; GFX10-NEXT: s_mov_b32 s3, s5 400; GFX10-NEXT: s_mov_b32 s4, s6 401; GFX10-NEXT: s_mov_b32 s5, s7 402; GFX10-NEXT: s_mov_b32 s6, s8 403; GFX10-NEXT: s_mov_b32 s7, s9 404; GFX10-NEXT: image_atomic_dec v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc a16 405; GFX10-NEXT: s_waitcnt vmcnt(0) 406; GFX10-NEXT: ; return to shader part epilog 407main_body: 408 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 409 %out = bitcast i32 %v to float 410 ret float %out 411} 412 413define amdgpu_ps float @atomic_cmpswap_i32_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) { 414; GFX9-LABEL: atomic_cmpswap_i32_1d: 415; GFX9: ; %bb.0: ; %main_body 416; GFX9-NEXT: s_mov_b32 s0, s2 417; GFX9-NEXT: s_mov_b32 s1, s3 418; GFX9-NEXT: s_mov_b32 s2, s4 419; GFX9-NEXT: s_mov_b32 s3, s5 420; GFX9-NEXT: s_mov_b32 s4, s6 421; GFX9-NEXT: s_mov_b32 s5, s7 422; GFX9-NEXT: s_mov_b32 s6, s8 423; GFX9-NEXT: s_mov_b32 s7, s9 424; GFX9-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 425; GFX9-NEXT: s_waitcnt vmcnt(0) 426; GFX9-NEXT: ; return to shader part epilog 427; 428; GFX10-LABEL: atomic_cmpswap_i32_1d: 429; GFX10: ; %bb.0: ; %main_body 430; GFX10-NEXT: s_mov_b32 s0, s2 431; GFX10-NEXT: s_mov_b32 s1, s3 432; GFX10-NEXT: s_mov_b32 s2, s4 433; GFX10-NEXT: s_mov_b32 s3, s5 434; GFX10-NEXT: s_mov_b32 s4, s6 435; GFX10-NEXT: s_mov_b32 s5, s7 436; GFX10-NEXT: s_mov_b32 s6, s8 437; GFX10-NEXT: s_mov_b32 s7, s9 438; GFX10-NEXT: image_atomic_cmpswap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 439; GFX10-NEXT: s_waitcnt vmcnt(0) 440; GFX10-NEXT: ; return to shader part epilog 441main_body: 442 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 443 %out = bitcast i32 %v to float 444 ret float %out 445} 446 447define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) { 448; GFX9-LABEL: atomic_add_i32_2d: 449; GFX9: ; %bb.0: ; %main_body 450; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 451; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 452; GFX9-NEXT: s_mov_b32 s0, s2 453; GFX9-NEXT: s_mov_b32 s1, s3 454; GFX9-NEXT: s_mov_b32 s2, s4 455; GFX9-NEXT: s_mov_b32 s3, s5 456; GFX9-NEXT: s_mov_b32 s4, s6 457; GFX9-NEXT: s_mov_b32 s5, s7 458; GFX9-NEXT: s_mov_b32 s6, s8 459; GFX9-NEXT: s_mov_b32 s7, s9 460; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 461; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 462; GFX9-NEXT: s_waitcnt vmcnt(0) 463; GFX9-NEXT: ; return to shader part epilog 464; 465; GFX10-LABEL: atomic_add_i32_2d: 466; GFX10: ; %bb.0: ; %main_body 467; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 468; GFX10-NEXT: s_mov_b32 s0, s2 469; GFX10-NEXT: s_mov_b32 s1, s3 470; GFX10-NEXT: s_mov_b32 s2, s4 471; GFX10-NEXT: s_mov_b32 s3, s5 472; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 473; GFX10-NEXT: s_mov_b32 s4, s6 474; GFX10-NEXT: s_mov_b32 s5, s7 475; GFX10-NEXT: s_mov_b32 s6, s8 476; GFX10-NEXT: s_mov_b32 s7, s9 477; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm glc a16 478; GFX10-NEXT: s_waitcnt vmcnt(0) 479; GFX10-NEXT: ; return to shader part epilog 480main_body: 481 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 482 %out = bitcast i32 %v to float 483 ret float %out 484} 485 486define amdgpu_ps float @atomic_add_i32_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) { 487; GFX9-LABEL: atomic_add_i32_3d: 488; GFX9: ; %bb.0: ; %main_body 489; GFX9-NEXT: s_mov_b32 s0, s2 490; GFX9-NEXT: s_mov_b32 s2, s4 491; GFX9-NEXT: s_mov_b32 s4, s6 492; GFX9-NEXT: s_mov_b32 s6, s8 493; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 494; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 495; GFX9-NEXT: s_lshl_b32 s8, s0, 16 496; GFX9-NEXT: s_mov_b32 s1, s3 497; GFX9-NEXT: s_mov_b32 s3, s5 498; GFX9-NEXT: s_mov_b32 s5, s7 499; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 500; GFX9-NEXT: s_mov_b32 s7, s9 501; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 502; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 503; GFX9-NEXT: s_waitcnt vmcnt(0) 504; GFX9-NEXT: ; return to shader part epilog 505; 506; GFX10-LABEL: atomic_add_i32_3d: 507; GFX10: ; %bb.0: ; %main_body 508; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 509; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 510; GFX10-NEXT: s_mov_b32 s0, s2 511; GFX10-NEXT: s_mov_b32 s2, s4 512; GFX10-NEXT: s_mov_b32 s4, s6 513; GFX10-NEXT: s_mov_b32 s6, s8 514; GFX10-NEXT: s_lshl_b32 s8, s0, 16 515; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 516; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 517; GFX10-NEXT: s_mov_b32 s1, s3 518; GFX10-NEXT: s_mov_b32 s3, s5 519; GFX10-NEXT: s_mov_b32 s5, s7 520; GFX10-NEXT: s_mov_b32 s7, s9 521; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm glc a16 522; GFX10-NEXT: s_waitcnt vmcnt(0) 523; GFX10-NEXT: ; return to shader part epilog 524main_body: 525 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 526 %out = bitcast i32 %v to float 527 ret float %out 528} 529 530define amdgpu_ps float @atomic_add_i32_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) { 531; GFX9-LABEL: atomic_add_i32_cube: 532; GFX9: ; %bb.0: ; %main_body 533; GFX9-NEXT: s_mov_b32 s0, s2 534; GFX9-NEXT: s_mov_b32 s2, s4 535; GFX9-NEXT: s_mov_b32 s4, s6 536; GFX9-NEXT: s_mov_b32 s6, s8 537; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 538; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 539; GFX9-NEXT: s_lshl_b32 s8, s0, 16 540; GFX9-NEXT: s_mov_b32 s1, s3 541; GFX9-NEXT: s_mov_b32 s3, s5 542; GFX9-NEXT: s_mov_b32 s5, s7 543; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 544; GFX9-NEXT: s_mov_b32 s7, s9 545; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 546; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da 547; GFX9-NEXT: s_waitcnt vmcnt(0) 548; GFX9-NEXT: ; return to shader part epilog 549; 550; GFX10-LABEL: atomic_add_i32_cube: 551; GFX10: ; %bb.0: ; %main_body 552; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 553; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 554; GFX10-NEXT: s_mov_b32 s0, s2 555; GFX10-NEXT: s_mov_b32 s2, s4 556; GFX10-NEXT: s_mov_b32 s4, s6 557; GFX10-NEXT: s_mov_b32 s6, s8 558; GFX10-NEXT: s_lshl_b32 s8, s0, 16 559; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 560; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 561; GFX10-NEXT: s_mov_b32 s1, s3 562; GFX10-NEXT: s_mov_b32 s3, s5 563; GFX10-NEXT: s_mov_b32 s5, s7 564; GFX10-NEXT: s_mov_b32 s7, s9 565; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_CUBE unorm glc a16 566; GFX10-NEXT: s_waitcnt vmcnt(0) 567; GFX10-NEXT: ; return to shader part epilog 568main_body: 569 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) 570 %out = bitcast i32 %v to float 571 ret float %out 572} 573 574define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) { 575; GFX9-LABEL: atomic_add_i32_1darray: 576; GFX9: ; %bb.0: ; %main_body 577; GFX9-NEXT: v_mov_b32_e32 v3, 0xffff 578; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 579; GFX9-NEXT: s_mov_b32 s0, s2 580; GFX9-NEXT: s_mov_b32 s1, s3 581; GFX9-NEXT: s_mov_b32 s2, s4 582; GFX9-NEXT: s_mov_b32 s3, s5 583; GFX9-NEXT: s_mov_b32 s4, s6 584; GFX9-NEXT: s_mov_b32 s5, s7 585; GFX9-NEXT: s_mov_b32 s6, s8 586; GFX9-NEXT: s_mov_b32 s7, s9 587; GFX9-NEXT: v_and_or_b32 v1, v1, v3, v2 588; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc a16 da 589; GFX9-NEXT: s_waitcnt vmcnt(0) 590; GFX9-NEXT: ; return to shader part epilog 591; 592; GFX10-LABEL: atomic_add_i32_1darray: 593; GFX10: ; %bb.0: ; %main_body 594; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 595; GFX10-NEXT: s_mov_b32 s0, s2 596; GFX10-NEXT: s_mov_b32 s1, s3 597; GFX10-NEXT: s_mov_b32 s2, s4 598; GFX10-NEXT: s_mov_b32 s3, s5 599; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2 600; GFX10-NEXT: s_mov_b32 s4, s6 601; GFX10-NEXT: s_mov_b32 s5, s7 602; GFX10-NEXT: s_mov_b32 s6, s8 603; GFX10-NEXT: s_mov_b32 s7, s9 604; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 605; GFX10-NEXT: s_waitcnt vmcnt(0) 606; GFX10-NEXT: ; return to shader part epilog 607main_body: 608 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 609 %out = bitcast i32 %v to float 610 ret float %out 611} 612 613define amdgpu_ps float @atomic_add_i32_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) { 614; GFX9-LABEL: atomic_add_i32_2darray: 615; GFX9: ; %bb.0: ; %main_body 616; GFX9-NEXT: s_mov_b32 s0, s2 617; GFX9-NEXT: s_mov_b32 s2, s4 618; GFX9-NEXT: s_mov_b32 s4, s6 619; GFX9-NEXT: s_mov_b32 s6, s8 620; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 621; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 622; GFX9-NEXT: s_lshl_b32 s8, s0, 16 623; GFX9-NEXT: s_mov_b32 s1, s3 624; GFX9-NEXT: s_mov_b32 s3, s5 625; GFX9-NEXT: s_mov_b32 s5, s7 626; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 627; GFX9-NEXT: s_mov_b32 s7, s9 628; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 629; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da 630; GFX9-NEXT: s_waitcnt vmcnt(0) 631; GFX9-NEXT: ; return to shader part epilog 632; 633; GFX10-LABEL: atomic_add_i32_2darray: 634; GFX10: ; %bb.0: ; %main_body 635; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 636; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 637; GFX10-NEXT: s_mov_b32 s0, s2 638; GFX10-NEXT: s_mov_b32 s2, s4 639; GFX10-NEXT: s_mov_b32 s4, s6 640; GFX10-NEXT: s_mov_b32 s6, s8 641; GFX10-NEXT: s_lshl_b32 s8, s0, 16 642; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 643; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 644; GFX10-NEXT: s_mov_b32 s1, s3 645; GFX10-NEXT: s_mov_b32 s3, s5 646; GFX10-NEXT: s_mov_b32 s5, s7 647; GFX10-NEXT: s_mov_b32 s7, s9 648; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 649; GFX10-NEXT: s_waitcnt vmcnt(0) 650; GFX10-NEXT: ; return to shader part epilog 651main_body: 652 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 653 %out = bitcast i32 %v to float 654 ret float %out 655} 656 657define amdgpu_ps float @atomic_add_i32_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) { 658; GFX9-LABEL: atomic_add_i32_2dmsaa: 659; GFX9: ; %bb.0: ; %main_body 660; GFX9-NEXT: s_mov_b32 s0, s2 661; GFX9-NEXT: s_mov_b32 s2, s4 662; GFX9-NEXT: s_mov_b32 s4, s6 663; GFX9-NEXT: s_mov_b32 s6, s8 664; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 665; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 666; GFX9-NEXT: s_lshl_b32 s8, s0, 16 667; GFX9-NEXT: s_mov_b32 s1, s3 668; GFX9-NEXT: s_mov_b32 s3, s5 669; GFX9-NEXT: s_mov_b32 s5, s7 670; GFX9-NEXT: v_and_or_b32 v1, v1, v4, v2 671; GFX9-NEXT: s_mov_b32 s7, s9 672; GFX9-NEXT: v_and_or_b32 v2, v3, v4, s8 673; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 674; GFX9-NEXT: s_waitcnt vmcnt(0) 675; GFX9-NEXT: ; return to shader part epilog 676; 677; GFX10-LABEL: atomic_add_i32_2dmsaa: 678; GFX10: ; %bb.0: ; %main_body 679; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 680; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 681; GFX10-NEXT: s_mov_b32 s0, s2 682; GFX10-NEXT: s_mov_b32 s2, s4 683; GFX10-NEXT: s_mov_b32 s4, s6 684; GFX10-NEXT: s_mov_b32 s6, s8 685; GFX10-NEXT: s_lshl_b32 s8, s0, 16 686; GFX10-NEXT: v_and_or_b32 v1, v1, v4, v2 687; GFX10-NEXT: v_and_or_b32 v2, v3, v4, s8 688; GFX10-NEXT: s_mov_b32 s1, s3 689; GFX10-NEXT: s_mov_b32 s3, s5 690; GFX10-NEXT: s_mov_b32 s5, s7 691; GFX10-NEXT: s_mov_b32 s7, s9 692; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 693; GFX10-NEXT: s_waitcnt vmcnt(0) 694; GFX10-NEXT: ; return to shader part epilog 695main_body: 696 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 697 %out = bitcast i32 %v to float 698 ret float %out 699} 700 701define amdgpu_ps float @atomic_add_i32_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 702; GFX9-LABEL: atomic_add_i32_2darraymsaa: 703; GFX9: ; %bb.0: ; %main_body 704; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 705; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 706; GFX9-NEXT: v_and_or_b32 v1, v1, v5, v2 707; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v4 708; GFX9-NEXT: s_mov_b32 s0, s2 709; GFX9-NEXT: s_mov_b32 s1, s3 710; GFX9-NEXT: s_mov_b32 s2, s4 711; GFX9-NEXT: s_mov_b32 s3, s5 712; GFX9-NEXT: s_mov_b32 s4, s6 713; GFX9-NEXT: s_mov_b32 s5, s7 714; GFX9-NEXT: s_mov_b32 s6, s8 715; GFX9-NEXT: s_mov_b32 s7, s9 716; GFX9-NEXT: v_and_or_b32 v2, v3, v5, v2 717; GFX9-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 unorm glc a16 da 718; GFX9-NEXT: s_waitcnt vmcnt(0) 719; GFX9-NEXT: ; return to shader part epilog 720; 721; GFX10-LABEL: atomic_add_i32_2darraymsaa: 722; GFX10: ; %bb.0: ; %main_body 723; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 724; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2 725; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 726; GFX10-NEXT: s_mov_b32 s0, s2 727; GFX10-NEXT: s_mov_b32 s1, s3 728; GFX10-NEXT: s_mov_b32 s2, s4 729; GFX10-NEXT: v_and_or_b32 v1, v1, v5, v2 730; GFX10-NEXT: v_and_or_b32 v2, v3, v5, v4 731; GFX10-NEXT: s_mov_b32 s3, s5 732; GFX10-NEXT: s_mov_b32 s4, s6 733; GFX10-NEXT: s_mov_b32 s5, s7 734; GFX10-NEXT: s_mov_b32 s6, s8 735; GFX10-NEXT: s_mov_b32 s7, s9 736; GFX10-NEXT: image_atomic_add v0, v[1:2], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 737; GFX10-NEXT: s_waitcnt vmcnt(0) 738; GFX10-NEXT: ; return to shader part epilog 739main_body: 740 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 741 %out = bitcast i32 %v to float 742 ret float %out 743} 744 745define amdgpu_ps float @atomic_add_i32_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 746; GFX9-LABEL: atomic_add_i32_1d_slc: 747; GFX9: ; %bb.0: ; %main_body 748; GFX9-NEXT: s_mov_b32 s0, s2 749; GFX9-NEXT: s_mov_b32 s1, s3 750; GFX9-NEXT: s_mov_b32 s2, s4 751; GFX9-NEXT: s_mov_b32 s3, s5 752; GFX9-NEXT: s_mov_b32 s4, s6 753; GFX9-NEXT: s_mov_b32 s5, s7 754; GFX9-NEXT: s_mov_b32 s6, s8 755; GFX9-NEXT: s_mov_b32 s7, s9 756; GFX9-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 unorm glc slc a16 757; GFX9-NEXT: s_waitcnt vmcnt(0) 758; GFX9-NEXT: ; return to shader part epilog 759; 760; GFX10-LABEL: atomic_add_i32_1d_slc: 761; GFX10: ; %bb.0: ; %main_body 762; GFX10-NEXT: s_mov_b32 s0, s2 763; GFX10-NEXT: s_mov_b32 s1, s3 764; GFX10-NEXT: s_mov_b32 s2, s4 765; GFX10-NEXT: s_mov_b32 s3, s5 766; GFX10-NEXT: s_mov_b32 s4, s6 767; GFX10-NEXT: s_mov_b32 s5, s7 768; GFX10-NEXT: s_mov_b32 s6, s8 769; GFX10-NEXT: s_mov_b32 s7, s9 770; GFX10-NEXT: image_atomic_add v0, v1, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm glc slc a16 771; GFX10-NEXT: s_waitcnt vmcnt(0) 772; GFX10-NEXT: ; return to shader part epilog 773main_body: 774 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 775 %out = bitcast i32 %v to float 776 ret float %out 777} 778 779define amdgpu_ps <2 x float> @atomic_swap_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 780; GFX9-LABEL: atomic_swap_i64_1d: 781; GFX9: ; %bb.0: ; %main_body 782; GFX9-NEXT: s_mov_b32 s0, s2 783; GFX9-NEXT: s_mov_b32 s1, s3 784; GFX9-NEXT: s_mov_b32 s2, s4 785; GFX9-NEXT: s_mov_b32 s3, s5 786; GFX9-NEXT: s_mov_b32 s4, s6 787; GFX9-NEXT: s_mov_b32 s5, s7 788; GFX9-NEXT: s_mov_b32 s6, s8 789; GFX9-NEXT: s_mov_b32 s7, s9 790; GFX9-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 791; GFX9-NEXT: s_waitcnt vmcnt(0) 792; GFX9-NEXT: ; return to shader part epilog 793; 794; GFX10-LABEL: atomic_swap_i64_1d: 795; GFX10: ; %bb.0: ; %main_body 796; GFX10-NEXT: s_mov_b32 s0, s2 797; GFX10-NEXT: s_mov_b32 s1, s3 798; GFX10-NEXT: s_mov_b32 s2, s4 799; GFX10-NEXT: s_mov_b32 s3, s5 800; GFX10-NEXT: s_mov_b32 s4, s6 801; GFX10-NEXT: s_mov_b32 s5, s7 802; GFX10-NEXT: s_mov_b32 s6, s8 803; GFX10-NEXT: s_mov_b32 s7, s9 804; GFX10-NEXT: image_atomic_swap v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 805; GFX10-NEXT: s_waitcnt vmcnt(0) 806; GFX10-NEXT: ; return to shader part epilog 807main_body: 808 %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 809 %out = bitcast i64 %v to <2 x float> 810 ret <2 x float> %out 811} 812 813define amdgpu_ps <2 x float> @atomic_add_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 814; GFX9-LABEL: atomic_add_i64_1d: 815; GFX9: ; %bb.0: ; %main_body 816; GFX9-NEXT: s_mov_b32 s0, s2 817; GFX9-NEXT: s_mov_b32 s1, s3 818; GFX9-NEXT: s_mov_b32 s2, s4 819; GFX9-NEXT: s_mov_b32 s3, s5 820; GFX9-NEXT: s_mov_b32 s4, s6 821; GFX9-NEXT: s_mov_b32 s5, s7 822; GFX9-NEXT: s_mov_b32 s6, s8 823; GFX9-NEXT: s_mov_b32 s7, s9 824; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 825; GFX9-NEXT: s_waitcnt vmcnt(0) 826; GFX9-NEXT: ; return to shader part epilog 827; 828; GFX10-LABEL: atomic_add_i64_1d: 829; GFX10: ; %bb.0: ; %main_body 830; GFX10-NEXT: s_mov_b32 s0, s2 831; GFX10-NEXT: s_mov_b32 s1, s3 832; GFX10-NEXT: s_mov_b32 s2, s4 833; GFX10-NEXT: s_mov_b32 s3, s5 834; GFX10-NEXT: s_mov_b32 s4, s6 835; GFX10-NEXT: s_mov_b32 s5, s7 836; GFX10-NEXT: s_mov_b32 s6, s8 837; GFX10-NEXT: s_mov_b32 s7, s9 838; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 839; GFX10-NEXT: s_waitcnt vmcnt(0) 840; GFX10-NEXT: ; return to shader part epilog 841main_body: 842 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 843 %out = bitcast i64 %v to <2 x float> 844 ret <2 x float> %out 845} 846 847define amdgpu_ps <2 x float> @atomic_sub_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 848; GFX9-LABEL: atomic_sub_i64_1d: 849; GFX9: ; %bb.0: ; %main_body 850; GFX9-NEXT: s_mov_b32 s0, s2 851; GFX9-NEXT: s_mov_b32 s1, s3 852; GFX9-NEXT: s_mov_b32 s2, s4 853; GFX9-NEXT: s_mov_b32 s3, s5 854; GFX9-NEXT: s_mov_b32 s4, s6 855; GFX9-NEXT: s_mov_b32 s5, s7 856; GFX9-NEXT: s_mov_b32 s6, s8 857; GFX9-NEXT: s_mov_b32 s7, s9 858; GFX9-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 859; GFX9-NEXT: s_waitcnt vmcnt(0) 860; GFX9-NEXT: ; return to shader part epilog 861; 862; GFX10-LABEL: atomic_sub_i64_1d: 863; GFX10: ; %bb.0: ; %main_body 864; GFX10-NEXT: s_mov_b32 s0, s2 865; GFX10-NEXT: s_mov_b32 s1, s3 866; GFX10-NEXT: s_mov_b32 s2, s4 867; GFX10-NEXT: s_mov_b32 s3, s5 868; GFX10-NEXT: s_mov_b32 s4, s6 869; GFX10-NEXT: s_mov_b32 s5, s7 870; GFX10-NEXT: s_mov_b32 s6, s8 871; GFX10-NEXT: s_mov_b32 s7, s9 872; GFX10-NEXT: image_atomic_sub v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 873; GFX10-NEXT: s_waitcnt vmcnt(0) 874; GFX10-NEXT: ; return to shader part epilog 875main_body: 876 %v = call i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 877 %out = bitcast i64 %v to <2 x float> 878 ret <2 x float> %out 879} 880 881define amdgpu_ps <2 x float> @atomic_smin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 882; GFX9-LABEL: atomic_smin_i64_1d: 883; GFX9: ; %bb.0: ; %main_body 884; GFX9-NEXT: s_mov_b32 s0, s2 885; GFX9-NEXT: s_mov_b32 s1, s3 886; GFX9-NEXT: s_mov_b32 s2, s4 887; GFX9-NEXT: s_mov_b32 s3, s5 888; GFX9-NEXT: s_mov_b32 s4, s6 889; GFX9-NEXT: s_mov_b32 s5, s7 890; GFX9-NEXT: s_mov_b32 s6, s8 891; GFX9-NEXT: s_mov_b32 s7, s9 892; GFX9-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 893; GFX9-NEXT: s_waitcnt vmcnt(0) 894; GFX9-NEXT: ; return to shader part epilog 895; 896; GFX10-LABEL: atomic_smin_i64_1d: 897; GFX10: ; %bb.0: ; %main_body 898; GFX10-NEXT: s_mov_b32 s0, s2 899; GFX10-NEXT: s_mov_b32 s1, s3 900; GFX10-NEXT: s_mov_b32 s2, s4 901; GFX10-NEXT: s_mov_b32 s3, s5 902; GFX10-NEXT: s_mov_b32 s4, s6 903; GFX10-NEXT: s_mov_b32 s5, s7 904; GFX10-NEXT: s_mov_b32 s6, s8 905; GFX10-NEXT: s_mov_b32 s7, s9 906; GFX10-NEXT: image_atomic_smin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 907; GFX10-NEXT: s_waitcnt vmcnt(0) 908; GFX10-NEXT: ; return to shader part epilog 909main_body: 910 %v = call i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 911 %out = bitcast i64 %v to <2 x float> 912 ret <2 x float> %out 913} 914 915define amdgpu_ps <2 x float> @atomic_umin_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 916; GFX9-LABEL: atomic_umin_i64_1d: 917; GFX9: ; %bb.0: ; %main_body 918; GFX9-NEXT: s_mov_b32 s0, s2 919; GFX9-NEXT: s_mov_b32 s1, s3 920; GFX9-NEXT: s_mov_b32 s2, s4 921; GFX9-NEXT: s_mov_b32 s3, s5 922; GFX9-NEXT: s_mov_b32 s4, s6 923; GFX9-NEXT: s_mov_b32 s5, s7 924; GFX9-NEXT: s_mov_b32 s6, s8 925; GFX9-NEXT: s_mov_b32 s7, s9 926; GFX9-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 927; GFX9-NEXT: s_waitcnt vmcnt(0) 928; GFX9-NEXT: ; return to shader part epilog 929; 930; GFX10-LABEL: atomic_umin_i64_1d: 931; GFX10: ; %bb.0: ; %main_body 932; GFX10-NEXT: s_mov_b32 s0, s2 933; GFX10-NEXT: s_mov_b32 s1, s3 934; GFX10-NEXT: s_mov_b32 s2, s4 935; GFX10-NEXT: s_mov_b32 s3, s5 936; GFX10-NEXT: s_mov_b32 s4, s6 937; GFX10-NEXT: s_mov_b32 s5, s7 938; GFX10-NEXT: s_mov_b32 s6, s8 939; GFX10-NEXT: s_mov_b32 s7, s9 940; GFX10-NEXT: image_atomic_umin v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 941; GFX10-NEXT: s_waitcnt vmcnt(0) 942; GFX10-NEXT: ; return to shader part epilog 943main_body: 944 %v = call i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 945 %out = bitcast i64 %v to <2 x float> 946 ret <2 x float> %out 947} 948 949define amdgpu_ps <2 x float> @atomic_smax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 950; GFX9-LABEL: atomic_smax_i64_1d: 951; GFX9: ; %bb.0: ; %main_body 952; GFX9-NEXT: s_mov_b32 s0, s2 953; GFX9-NEXT: s_mov_b32 s1, s3 954; GFX9-NEXT: s_mov_b32 s2, s4 955; GFX9-NEXT: s_mov_b32 s3, s5 956; GFX9-NEXT: s_mov_b32 s4, s6 957; GFX9-NEXT: s_mov_b32 s5, s7 958; GFX9-NEXT: s_mov_b32 s6, s8 959; GFX9-NEXT: s_mov_b32 s7, s9 960; GFX9-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 961; GFX9-NEXT: s_waitcnt vmcnt(0) 962; GFX9-NEXT: ; return to shader part epilog 963; 964; GFX10-LABEL: atomic_smax_i64_1d: 965; GFX10: ; %bb.0: ; %main_body 966; GFX10-NEXT: s_mov_b32 s0, s2 967; GFX10-NEXT: s_mov_b32 s1, s3 968; GFX10-NEXT: s_mov_b32 s2, s4 969; GFX10-NEXT: s_mov_b32 s3, s5 970; GFX10-NEXT: s_mov_b32 s4, s6 971; GFX10-NEXT: s_mov_b32 s5, s7 972; GFX10-NEXT: s_mov_b32 s6, s8 973; GFX10-NEXT: s_mov_b32 s7, s9 974; GFX10-NEXT: image_atomic_smax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 975; GFX10-NEXT: s_waitcnt vmcnt(0) 976; GFX10-NEXT: ; return to shader part epilog 977main_body: 978 %v = call i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 979 %out = bitcast i64 %v to <2 x float> 980 ret <2 x float> %out 981} 982 983define amdgpu_ps <2 x float> @atomic_umax_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 984; GFX9-LABEL: atomic_umax_i64_1d: 985; GFX9: ; %bb.0: ; %main_body 986; GFX9-NEXT: s_mov_b32 s0, s2 987; GFX9-NEXT: s_mov_b32 s1, s3 988; GFX9-NEXT: s_mov_b32 s2, s4 989; GFX9-NEXT: s_mov_b32 s3, s5 990; GFX9-NEXT: s_mov_b32 s4, s6 991; GFX9-NEXT: s_mov_b32 s5, s7 992; GFX9-NEXT: s_mov_b32 s6, s8 993; GFX9-NEXT: s_mov_b32 s7, s9 994; GFX9-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 995; GFX9-NEXT: s_waitcnt vmcnt(0) 996; GFX9-NEXT: ; return to shader part epilog 997; 998; GFX10-LABEL: atomic_umax_i64_1d: 999; GFX10: ; %bb.0: ; %main_body 1000; GFX10-NEXT: s_mov_b32 s0, s2 1001; GFX10-NEXT: s_mov_b32 s1, s3 1002; GFX10-NEXT: s_mov_b32 s2, s4 1003; GFX10-NEXT: s_mov_b32 s3, s5 1004; GFX10-NEXT: s_mov_b32 s4, s6 1005; GFX10-NEXT: s_mov_b32 s5, s7 1006; GFX10-NEXT: s_mov_b32 s6, s8 1007; GFX10-NEXT: s_mov_b32 s7, s9 1008; GFX10-NEXT: image_atomic_umax v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1009; GFX10-NEXT: s_waitcnt vmcnt(0) 1010; GFX10-NEXT: ; return to shader part epilog 1011main_body: 1012 %v = call i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1013 %out = bitcast i64 %v to <2 x float> 1014 ret <2 x float> %out 1015} 1016 1017define amdgpu_ps <2 x float> @atomic_and_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1018; GFX9-LABEL: atomic_and_i64_1d: 1019; GFX9: ; %bb.0: ; %main_body 1020; GFX9-NEXT: s_mov_b32 s0, s2 1021; GFX9-NEXT: s_mov_b32 s1, s3 1022; GFX9-NEXT: s_mov_b32 s2, s4 1023; GFX9-NEXT: s_mov_b32 s3, s5 1024; GFX9-NEXT: s_mov_b32 s4, s6 1025; GFX9-NEXT: s_mov_b32 s5, s7 1026; GFX9-NEXT: s_mov_b32 s6, s8 1027; GFX9-NEXT: s_mov_b32 s7, s9 1028; GFX9-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1029; GFX9-NEXT: s_waitcnt vmcnt(0) 1030; GFX9-NEXT: ; return to shader part epilog 1031; 1032; GFX10-LABEL: atomic_and_i64_1d: 1033; GFX10: ; %bb.0: ; %main_body 1034; GFX10-NEXT: s_mov_b32 s0, s2 1035; GFX10-NEXT: s_mov_b32 s1, s3 1036; GFX10-NEXT: s_mov_b32 s2, s4 1037; GFX10-NEXT: s_mov_b32 s3, s5 1038; GFX10-NEXT: s_mov_b32 s4, s6 1039; GFX10-NEXT: s_mov_b32 s5, s7 1040; GFX10-NEXT: s_mov_b32 s6, s8 1041; GFX10-NEXT: s_mov_b32 s7, s9 1042; GFX10-NEXT: image_atomic_and v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1043; GFX10-NEXT: s_waitcnt vmcnt(0) 1044; GFX10-NEXT: ; return to shader part epilog 1045main_body: 1046 %v = call i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1047 %out = bitcast i64 %v to <2 x float> 1048 ret <2 x float> %out 1049} 1050 1051define amdgpu_ps <2 x float> @atomic_or_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1052; GFX9-LABEL: atomic_or_i64_1d: 1053; GFX9: ; %bb.0: ; %main_body 1054; GFX9-NEXT: s_mov_b32 s0, s2 1055; GFX9-NEXT: s_mov_b32 s1, s3 1056; GFX9-NEXT: s_mov_b32 s2, s4 1057; GFX9-NEXT: s_mov_b32 s3, s5 1058; GFX9-NEXT: s_mov_b32 s4, s6 1059; GFX9-NEXT: s_mov_b32 s5, s7 1060; GFX9-NEXT: s_mov_b32 s6, s8 1061; GFX9-NEXT: s_mov_b32 s7, s9 1062; GFX9-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1063; GFX9-NEXT: s_waitcnt vmcnt(0) 1064; GFX9-NEXT: ; return to shader part epilog 1065; 1066; GFX10-LABEL: atomic_or_i64_1d: 1067; GFX10: ; %bb.0: ; %main_body 1068; GFX10-NEXT: s_mov_b32 s0, s2 1069; GFX10-NEXT: s_mov_b32 s1, s3 1070; GFX10-NEXT: s_mov_b32 s2, s4 1071; GFX10-NEXT: s_mov_b32 s3, s5 1072; GFX10-NEXT: s_mov_b32 s4, s6 1073; GFX10-NEXT: s_mov_b32 s5, s7 1074; GFX10-NEXT: s_mov_b32 s6, s8 1075; GFX10-NEXT: s_mov_b32 s7, s9 1076; GFX10-NEXT: image_atomic_or v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1077; GFX10-NEXT: s_waitcnt vmcnt(0) 1078; GFX10-NEXT: ; return to shader part epilog 1079main_body: 1080 %v = call i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1081 %out = bitcast i64 %v to <2 x float> 1082 ret <2 x float> %out 1083} 1084 1085define amdgpu_ps <2 x float> @atomic_xor_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1086; GFX9-LABEL: atomic_xor_i64_1d: 1087; GFX9: ; %bb.0: ; %main_body 1088; GFX9-NEXT: s_mov_b32 s0, s2 1089; GFX9-NEXT: s_mov_b32 s1, s3 1090; GFX9-NEXT: s_mov_b32 s2, s4 1091; GFX9-NEXT: s_mov_b32 s3, s5 1092; GFX9-NEXT: s_mov_b32 s4, s6 1093; GFX9-NEXT: s_mov_b32 s5, s7 1094; GFX9-NEXT: s_mov_b32 s6, s8 1095; GFX9-NEXT: s_mov_b32 s7, s9 1096; GFX9-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1097; GFX9-NEXT: s_waitcnt vmcnt(0) 1098; GFX9-NEXT: ; return to shader part epilog 1099; 1100; GFX10-LABEL: atomic_xor_i64_1d: 1101; GFX10: ; %bb.0: ; %main_body 1102; GFX10-NEXT: s_mov_b32 s0, s2 1103; GFX10-NEXT: s_mov_b32 s1, s3 1104; GFX10-NEXT: s_mov_b32 s2, s4 1105; GFX10-NEXT: s_mov_b32 s3, s5 1106; GFX10-NEXT: s_mov_b32 s4, s6 1107; GFX10-NEXT: s_mov_b32 s5, s7 1108; GFX10-NEXT: s_mov_b32 s6, s8 1109; GFX10-NEXT: s_mov_b32 s7, s9 1110; GFX10-NEXT: image_atomic_xor v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1111; GFX10-NEXT: s_waitcnt vmcnt(0) 1112; GFX10-NEXT: ; return to shader part epilog 1113main_body: 1114 %v = call i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1115 %out = bitcast i64 %v to <2 x float> 1116 ret <2 x float> %out 1117} 1118 1119define amdgpu_ps <2 x float> @atomic_inc_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1120; GFX9-LABEL: atomic_inc_i64_1d: 1121; GFX9: ; %bb.0: ; %main_body 1122; GFX9-NEXT: s_mov_b32 s0, s2 1123; GFX9-NEXT: s_mov_b32 s1, s3 1124; GFX9-NEXT: s_mov_b32 s2, s4 1125; GFX9-NEXT: s_mov_b32 s3, s5 1126; GFX9-NEXT: s_mov_b32 s4, s6 1127; GFX9-NEXT: s_mov_b32 s5, s7 1128; GFX9-NEXT: s_mov_b32 s6, s8 1129; GFX9-NEXT: s_mov_b32 s7, s9 1130; GFX9-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1131; GFX9-NEXT: s_waitcnt vmcnt(0) 1132; GFX9-NEXT: ; return to shader part epilog 1133; 1134; GFX10-LABEL: atomic_inc_i64_1d: 1135; GFX10: ; %bb.0: ; %main_body 1136; GFX10-NEXT: s_mov_b32 s0, s2 1137; GFX10-NEXT: s_mov_b32 s1, s3 1138; GFX10-NEXT: s_mov_b32 s2, s4 1139; GFX10-NEXT: s_mov_b32 s3, s5 1140; GFX10-NEXT: s_mov_b32 s4, s6 1141; GFX10-NEXT: s_mov_b32 s5, s7 1142; GFX10-NEXT: s_mov_b32 s6, s8 1143; GFX10-NEXT: s_mov_b32 s7, s9 1144; GFX10-NEXT: image_atomic_inc v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1145; GFX10-NEXT: s_waitcnt vmcnt(0) 1146; GFX10-NEXT: ; return to shader part epilog 1147main_body: 1148 %v = call i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1149 %out = bitcast i64 %v to <2 x float> 1150 ret <2 x float> %out 1151} 1152 1153define amdgpu_ps <2 x float> @atomic_dec_i64_1d(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1154; GFX9-LABEL: atomic_dec_i64_1d: 1155; GFX9: ; %bb.0: ; %main_body 1156; GFX9-NEXT: s_mov_b32 s0, s2 1157; GFX9-NEXT: s_mov_b32 s1, s3 1158; GFX9-NEXT: s_mov_b32 s2, s4 1159; GFX9-NEXT: s_mov_b32 s3, s5 1160; GFX9-NEXT: s_mov_b32 s4, s6 1161; GFX9-NEXT: s_mov_b32 s5, s7 1162; GFX9-NEXT: s_mov_b32 s6, s8 1163; GFX9-NEXT: s_mov_b32 s7, s9 1164; GFX9-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1165; GFX9-NEXT: s_waitcnt vmcnt(0) 1166; GFX9-NEXT: ; return to shader part epilog 1167; 1168; GFX10-LABEL: atomic_dec_i64_1d: 1169; GFX10: ; %bb.0: ; %main_body 1170; GFX10-NEXT: s_mov_b32 s0, s2 1171; GFX10-NEXT: s_mov_b32 s1, s3 1172; GFX10-NEXT: s_mov_b32 s2, s4 1173; GFX10-NEXT: s_mov_b32 s3, s5 1174; GFX10-NEXT: s_mov_b32 s4, s6 1175; GFX10-NEXT: s_mov_b32 s5, s7 1176; GFX10-NEXT: s_mov_b32 s6, s8 1177; GFX10-NEXT: s_mov_b32 s7, s9 1178; GFX10-NEXT: image_atomic_dec v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc a16 1179; GFX10-NEXT: s_waitcnt vmcnt(0) 1180; GFX10-NEXT: ; return to shader part epilog 1181main_body: 1182 %v = call i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1183 %out = bitcast i64 %v to <2 x float> 1184 ret <2 x float> %out 1185} 1186 1187define amdgpu_ps <2 x float> @atomic_cmpswap_i64_1d(<8 x i32> inreg %rsrc, i64 %cmp, i64 %swap, i16 %s) { 1188; GFX9-LABEL: atomic_cmpswap_i64_1d: 1189; GFX9: ; %bb.0: ; %main_body 1190; GFX9-NEXT: s_mov_b32 s0, s2 1191; GFX9-NEXT: s_mov_b32 s1, s3 1192; GFX9-NEXT: s_mov_b32 s2, s4 1193; GFX9-NEXT: s_mov_b32 s3, s5 1194; GFX9-NEXT: s_mov_b32 s4, s6 1195; GFX9-NEXT: s_mov_b32 s5, s7 1196; GFX9-NEXT: s_mov_b32 s6, s8 1197; GFX9-NEXT: s_mov_b32 s7, s9 1198; GFX9-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf unorm glc a16 1199; GFX9-NEXT: s_waitcnt vmcnt(0) 1200; GFX9-NEXT: ; return to shader part epilog 1201; 1202; GFX10-LABEL: atomic_cmpswap_i64_1d: 1203; GFX10: ; %bb.0: ; %main_body 1204; GFX10-NEXT: s_mov_b32 s0, s2 1205; GFX10-NEXT: s_mov_b32 s1, s3 1206; GFX10-NEXT: s_mov_b32 s2, s4 1207; GFX10-NEXT: s_mov_b32 s3, s5 1208; GFX10-NEXT: s_mov_b32 s4, s6 1209; GFX10-NEXT: s_mov_b32 s5, s7 1210; GFX10-NEXT: s_mov_b32 s6, s8 1211; GFX10-NEXT: s_mov_b32 s7, s9 1212; GFX10-NEXT: image_atomic_cmpswap v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm glc a16 1213; GFX10-NEXT: s_waitcnt vmcnt(0) 1214; GFX10-NEXT: ; return to shader part epilog 1215main_body: 1216 %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64 %cmp, i64 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 1217 %out = bitcast i64 %v to <2 x float> 1218 ret <2 x float> %out 1219} 1220 1221define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t) { 1222; GFX9-LABEL: atomic_add_i64_2d: 1223; GFX9: ; %bb.0: ; %main_body 1224; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 1225; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1226; GFX9-NEXT: s_mov_b32 s0, s2 1227; GFX9-NEXT: s_mov_b32 s1, s3 1228; GFX9-NEXT: s_mov_b32 s2, s4 1229; GFX9-NEXT: s_mov_b32 s3, s5 1230; GFX9-NEXT: s_mov_b32 s4, s6 1231; GFX9-NEXT: s_mov_b32 s5, s7 1232; GFX9-NEXT: s_mov_b32 s6, s8 1233; GFX9-NEXT: s_mov_b32 s7, s9 1234; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 1235; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 1236; GFX9-NEXT: s_waitcnt vmcnt(0) 1237; GFX9-NEXT: ; return to shader part epilog 1238; 1239; GFX10-LABEL: atomic_add_i64_2d: 1240; GFX10: ; %bb.0: ; %main_body 1241; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1242; GFX10-NEXT: s_mov_b32 s0, s2 1243; GFX10-NEXT: s_mov_b32 s1, s3 1244; GFX10-NEXT: s_mov_b32 s2, s4 1245; GFX10-NEXT: s_mov_b32 s3, s5 1246; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 1247; GFX10-NEXT: s_mov_b32 s4, s6 1248; GFX10-NEXT: s_mov_b32 s5, s7 1249; GFX10-NEXT: s_mov_b32 s6, s8 1250; GFX10-NEXT: s_mov_b32 s7, s9 1251; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm glc a16 1252; GFX10-NEXT: s_waitcnt vmcnt(0) 1253; GFX10-NEXT: ; return to shader part epilog 1254main_body: 1255 %v = call i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 1256 %out = bitcast i64 %v to <2 x float> 1257 ret <2 x float> %out 1258} 1259 1260define amdgpu_ps <2 x float> @atomic_add_i64_3d(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %r) { 1261; GFX9-LABEL: atomic_add_i64_3d: 1262; GFX9: ; %bb.0: ; %main_body 1263; GFX9-NEXT: s_mov_b32 s0, s2 1264; GFX9-NEXT: s_mov_b32 s2, s4 1265; GFX9-NEXT: s_mov_b32 s4, s6 1266; GFX9-NEXT: s_mov_b32 s6, s8 1267; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1268; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1269; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1270; GFX9-NEXT: s_mov_b32 s1, s3 1271; GFX9-NEXT: s_mov_b32 s3, s5 1272; GFX9-NEXT: s_mov_b32 s5, s7 1273; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1274; GFX9-NEXT: s_mov_b32 s7, s9 1275; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1276; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 1277; GFX9-NEXT: s_waitcnt vmcnt(0) 1278; GFX9-NEXT: ; return to shader part epilog 1279; 1280; GFX10-LABEL: atomic_add_i64_3d: 1281; GFX10: ; %bb.0: ; %main_body 1282; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1283; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1284; GFX10-NEXT: s_mov_b32 s0, s2 1285; GFX10-NEXT: s_mov_b32 s2, s4 1286; GFX10-NEXT: s_mov_b32 s4, s6 1287; GFX10-NEXT: s_mov_b32 s6, s8 1288; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1289; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1290; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1291; GFX10-NEXT: s_mov_b32 s1, s3 1292; GFX10-NEXT: s_mov_b32 s3, s5 1293; GFX10-NEXT: s_mov_b32 s5, s7 1294; GFX10-NEXT: s_mov_b32 s7, s9 1295; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm glc a16 1296; GFX10-NEXT: s_waitcnt vmcnt(0) 1297; GFX10-NEXT: ; return to shader part epilog 1298main_body: 1299 %v = call i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 1300 %out = bitcast i64 %v to <2 x float> 1301 ret <2 x float> %out 1302} 1303 1304define amdgpu_ps <2 x float> @atomic_add_i64_cube(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %face) { 1305; GFX9-LABEL: atomic_add_i64_cube: 1306; GFX9: ; %bb.0: ; %main_body 1307; GFX9-NEXT: s_mov_b32 s0, s2 1308; GFX9-NEXT: s_mov_b32 s2, s4 1309; GFX9-NEXT: s_mov_b32 s4, s6 1310; GFX9-NEXT: s_mov_b32 s6, s8 1311; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1312; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1313; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1314; GFX9-NEXT: s_mov_b32 s1, s3 1315; GFX9-NEXT: s_mov_b32 s3, s5 1316; GFX9-NEXT: s_mov_b32 s5, s7 1317; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1318; GFX9-NEXT: s_mov_b32 s7, s9 1319; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1320; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da 1321; GFX9-NEXT: s_waitcnt vmcnt(0) 1322; GFX9-NEXT: ; return to shader part epilog 1323; 1324; GFX10-LABEL: atomic_add_i64_cube: 1325; GFX10: ; %bb.0: ; %main_body 1326; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1327; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1328; GFX10-NEXT: s_mov_b32 s0, s2 1329; GFX10-NEXT: s_mov_b32 s2, s4 1330; GFX10-NEXT: s_mov_b32 s4, s6 1331; GFX10-NEXT: s_mov_b32 s6, s8 1332; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1333; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1334; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1335; GFX10-NEXT: s_mov_b32 s1, s3 1336; GFX10-NEXT: s_mov_b32 s3, s5 1337; GFX10-NEXT: s_mov_b32 s5, s7 1338; GFX10-NEXT: s_mov_b32 s7, s9 1339; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_CUBE unorm glc a16 1340; GFX10-NEXT: s_waitcnt vmcnt(0) 1341; GFX10-NEXT: ; return to shader part epilog 1342main_body: 1343 %v = call i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64 %data, i16 %s, i16 %t, i16 %face , <8 x i32> %rsrc, i32 0, i32 0) 1344 %out = bitcast i64 %v to <2 x float> 1345 ret <2 x float> %out 1346} 1347 1348define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %slice) { 1349; GFX9-LABEL: atomic_add_i64_1darray: 1350; GFX9: ; %bb.0: ; %main_body 1351; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 1352; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1353; GFX9-NEXT: s_mov_b32 s0, s2 1354; GFX9-NEXT: s_mov_b32 s1, s3 1355; GFX9-NEXT: s_mov_b32 s2, s4 1356; GFX9-NEXT: s_mov_b32 s3, s5 1357; GFX9-NEXT: s_mov_b32 s4, s6 1358; GFX9-NEXT: s_mov_b32 s5, s7 1359; GFX9-NEXT: s_mov_b32 s6, s8 1360; GFX9-NEXT: s_mov_b32 s7, s9 1361; GFX9-NEXT: v_and_or_b32 v2, v2, v4, v3 1362; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc a16 da 1363; GFX9-NEXT: s_waitcnt vmcnt(0) 1364; GFX9-NEXT: ; return to shader part epilog 1365; 1366; GFX10-LABEL: atomic_add_i64_1darray: 1367; GFX10: ; %bb.0: ; %main_body 1368; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1369; GFX10-NEXT: s_mov_b32 s0, s2 1370; GFX10-NEXT: s_mov_b32 s1, s3 1371; GFX10-NEXT: s_mov_b32 s2, s4 1372; GFX10-NEXT: s_mov_b32 s3, s5 1373; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3 1374; GFX10-NEXT: s_mov_b32 s4, s6 1375; GFX10-NEXT: s_mov_b32 s5, s7 1376; GFX10-NEXT: s_mov_b32 s6, s8 1377; GFX10-NEXT: s_mov_b32 s7, s9 1378; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D_ARRAY unorm glc a16 1379; GFX10-NEXT: s_waitcnt vmcnt(0) 1380; GFX10-NEXT: ; return to shader part epilog 1381main_body: 1382 %v = call i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1383 %out = bitcast i64 %v to <2 x float> 1384 ret <2 x float> %out 1385} 1386 1387define amdgpu_ps <2 x float> @atomic_add_i64_2darray(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice) { 1388; GFX9-LABEL: atomic_add_i64_2darray: 1389; GFX9: ; %bb.0: ; %main_body 1390; GFX9-NEXT: s_mov_b32 s0, s2 1391; GFX9-NEXT: s_mov_b32 s2, s4 1392; GFX9-NEXT: s_mov_b32 s4, s6 1393; GFX9-NEXT: s_mov_b32 s6, s8 1394; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1395; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1396; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1397; GFX9-NEXT: s_mov_b32 s1, s3 1398; GFX9-NEXT: s_mov_b32 s3, s5 1399; GFX9-NEXT: s_mov_b32 s5, s7 1400; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1401; GFX9-NEXT: s_mov_b32 s7, s9 1402; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1403; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da 1404; GFX9-NEXT: s_waitcnt vmcnt(0) 1405; GFX9-NEXT: ; return to shader part epilog 1406; 1407; GFX10-LABEL: atomic_add_i64_2darray: 1408; GFX10: ; %bb.0: ; %main_body 1409; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1410; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1411; GFX10-NEXT: s_mov_b32 s0, s2 1412; GFX10-NEXT: s_mov_b32 s2, s4 1413; GFX10-NEXT: s_mov_b32 s4, s6 1414; GFX10-NEXT: s_mov_b32 s6, s8 1415; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1416; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1417; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1418; GFX10-NEXT: s_mov_b32 s1, s3 1419; GFX10-NEXT: s_mov_b32 s3, s5 1420; GFX10-NEXT: s_mov_b32 s5, s7 1421; GFX10-NEXT: s_mov_b32 s7, s9 1422; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_ARRAY unorm glc a16 1423; GFX10-NEXT: s_waitcnt vmcnt(0) 1424; GFX10-NEXT: ; return to shader part epilog 1425main_body: 1426 %v = call i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 1427 %out = bitcast i64 %v to <2 x float> 1428 ret <2 x float> %out 1429} 1430 1431define amdgpu_ps <2 x float> @atomic_add_i64_2dmsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %fragid) { 1432; GFX9-LABEL: atomic_add_i64_2dmsaa: 1433; GFX9: ; %bb.0: ; %main_body 1434; GFX9-NEXT: s_mov_b32 s0, s2 1435; GFX9-NEXT: s_mov_b32 s2, s4 1436; GFX9-NEXT: s_mov_b32 s4, s6 1437; GFX9-NEXT: s_mov_b32 s6, s8 1438; GFX9-NEXT: v_mov_b32_e32 v5, 0xffff 1439; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1440; GFX9-NEXT: s_lshl_b32 s8, s0, 16 1441; GFX9-NEXT: s_mov_b32 s1, s3 1442; GFX9-NEXT: s_mov_b32 s3, s5 1443; GFX9-NEXT: s_mov_b32 s5, s7 1444; GFX9-NEXT: v_and_or_b32 v2, v2, v5, v3 1445; GFX9-NEXT: s_mov_b32 s7, s9 1446; GFX9-NEXT: v_and_or_b32 v3, v4, v5, s8 1447; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 1448; GFX9-NEXT: s_waitcnt vmcnt(0) 1449; GFX9-NEXT: ; return to shader part epilog 1450; 1451; GFX10-LABEL: atomic_add_i64_2dmsaa: 1452; GFX10: ; %bb.0: ; %main_body 1453; GFX10-NEXT: v_mov_b32_e32 v5, 0xffff 1454; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1455; GFX10-NEXT: s_mov_b32 s0, s2 1456; GFX10-NEXT: s_mov_b32 s2, s4 1457; GFX10-NEXT: s_mov_b32 s4, s6 1458; GFX10-NEXT: s_mov_b32 s6, s8 1459; GFX10-NEXT: s_lshl_b32 s8, s0, 16 1460; GFX10-NEXT: v_and_or_b32 v2, v2, v5, v3 1461; GFX10-NEXT: v_and_or_b32 v3, v4, v5, s8 1462; GFX10-NEXT: s_mov_b32 s1, s3 1463; GFX10-NEXT: s_mov_b32 s3, s5 1464; GFX10-NEXT: s_mov_b32 s5, s7 1465; GFX10-NEXT: s_mov_b32 s7, s9 1466; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA unorm glc a16 1467; GFX10-NEXT: s_waitcnt vmcnt(0) 1468; GFX10-NEXT: ; return to shader part epilog 1469main_body: 1470 %v = call i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1471 %out = bitcast i64 %v to <2 x float> 1472 ret <2 x float> %out 1473} 1474 1475define amdgpu_ps <2 x float> @atomic_add_i64_2darraymsaa(<8 x i32> inreg %rsrc, i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 1476; GFX9-LABEL: atomic_add_i64_2darraymsaa: 1477; GFX9: ; %bb.0: ; %main_body 1478; GFX9-NEXT: v_mov_b32_e32 v6, 0xffff 1479; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1480; GFX9-NEXT: v_and_or_b32 v2, v2, v6, v3 1481; GFX9-NEXT: v_lshlrev_b32_e32 v3, 16, v5 1482; GFX9-NEXT: s_mov_b32 s0, s2 1483; GFX9-NEXT: s_mov_b32 s1, s3 1484; GFX9-NEXT: s_mov_b32 s2, s4 1485; GFX9-NEXT: s_mov_b32 s3, s5 1486; GFX9-NEXT: s_mov_b32 s4, s6 1487; GFX9-NEXT: s_mov_b32 s5, s7 1488; GFX9-NEXT: s_mov_b32 s6, s8 1489; GFX9-NEXT: s_mov_b32 s7, s9 1490; GFX9-NEXT: v_and_or_b32 v3, v4, v6, v3 1491; GFX9-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 unorm glc a16 da 1492; GFX9-NEXT: s_waitcnt vmcnt(0) 1493; GFX9-NEXT: ; return to shader part epilog 1494; 1495; GFX10-LABEL: atomic_add_i64_2darraymsaa: 1496; GFX10: ; %bb.0: ; %main_body 1497; GFX10-NEXT: v_mov_b32_e32 v6, 0xffff 1498; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1499; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1500; GFX10-NEXT: s_mov_b32 s0, s2 1501; GFX10-NEXT: s_mov_b32 s1, s3 1502; GFX10-NEXT: s_mov_b32 s2, s4 1503; GFX10-NEXT: v_and_or_b32 v2, v2, v6, v3 1504; GFX10-NEXT: v_and_or_b32 v3, v4, v6, v5 1505; GFX10-NEXT: s_mov_b32 s3, s5 1506; GFX10-NEXT: s_mov_b32 s4, s6 1507; GFX10-NEXT: s_mov_b32 s5, s7 1508; GFX10-NEXT: s_mov_b32 s6, s8 1509; GFX10-NEXT: s_mov_b32 s7, s9 1510; GFX10-NEXT: image_atomic_add v[0:1], v[2:3], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm glc a16 1511; GFX10-NEXT: s_waitcnt vmcnt(0) 1512; GFX10-NEXT: ; return to shader part epilog 1513main_body: 1514 %v = call i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1515 %out = bitcast i64 %v to <2 x float> 1516 ret <2 x float> %out 1517} 1518 1519define amdgpu_ps <2 x float> @atomic_add_i64_1d_slc(<8 x i32> inreg %rsrc, i64 %data, i16 %s) { 1520; GFX9-LABEL: atomic_add_i64_1d_slc: 1521; GFX9: ; %bb.0: ; %main_body 1522; GFX9-NEXT: s_mov_b32 s0, s2 1523; GFX9-NEXT: s_mov_b32 s1, s3 1524; GFX9-NEXT: s_mov_b32 s2, s4 1525; GFX9-NEXT: s_mov_b32 s3, s5 1526; GFX9-NEXT: s_mov_b32 s4, s6 1527; GFX9-NEXT: s_mov_b32 s5, s7 1528; GFX9-NEXT: s_mov_b32 s6, s8 1529; GFX9-NEXT: s_mov_b32 s7, s9 1530; GFX9-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 unorm glc slc a16 1531; GFX9-NEXT: s_waitcnt vmcnt(0) 1532; GFX9-NEXT: ; return to shader part epilog 1533; 1534; GFX10-LABEL: atomic_add_i64_1d_slc: 1535; GFX10: ; %bb.0: ; %main_body 1536; GFX10-NEXT: s_mov_b32 s0, s2 1537; GFX10-NEXT: s_mov_b32 s1, s3 1538; GFX10-NEXT: s_mov_b32 s2, s4 1539; GFX10-NEXT: s_mov_b32 s3, s5 1540; GFX10-NEXT: s_mov_b32 s4, s6 1541; GFX10-NEXT: s_mov_b32 s5, s7 1542; GFX10-NEXT: s_mov_b32 s6, s8 1543; GFX10-NEXT: s_mov_b32 s7, s9 1544; GFX10-NEXT: image_atomic_add v[0:1], v2, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm glc slc a16 1545; GFX10-NEXT: s_waitcnt vmcnt(0) 1546; GFX10-NEXT: ; return to shader part epilog 1547main_body: 1548 %v = call i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 1549 %out = bitcast i64 %v to <2 x float> 1550 ret <2 x float> %out 1551} 1552 1553declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1554declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1555declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1556declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1557declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1558declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1559declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1560declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1561declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1562declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1563declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1564declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1565declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1566declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1567declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1568declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1569declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1570declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1571declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1572declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1573 1574declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1575declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1576declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1577declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1578declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1579declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1580declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1581declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1582declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1583declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1584declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1585declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1586declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1587declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1588declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1589declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1590declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1591declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1592declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1593declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1594 1595attributes #0 = { nounwind } 1596