1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10NSA %s 4 5define amdgpu_ps float @atomic_swap_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 6 ; GFX9-LABEL: name: atomic_swap_1d 7 ; GFX9: bb.1.main_body: 8 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 9 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 10 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 11 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 12 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 13 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 14 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 15 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 16 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 17 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 18 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 19 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 20 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 21 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 22 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 23 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 24 ; GFX10NSA-LABEL: name: atomic_swap_1d 25 ; GFX10NSA: bb.1.main_body: 26 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 27 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 28 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 29 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 30 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 31 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 32 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 33 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 34 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 35 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 36 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 37 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 38 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 39 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.swap.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 40 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 41 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 42main_body: 43 %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 44 %out = bitcast i32 %v to float 45 ret float %out 46} 47 48define amdgpu_ps float @atomic_add_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 49 ; GFX9-LABEL: name: atomic_add_1d 50 ; GFX9: bb.1.main_body: 51 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 52 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 53 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 54 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 55 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 56 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 57 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 58 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 59 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 60 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 61 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 62 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 63 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 64 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 65 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 66 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 67 ; GFX10NSA-LABEL: name: atomic_add_1d 68 ; GFX10NSA: bb.1.main_body: 69 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 70 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 71 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 72 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 73 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 74 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 75 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 76 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 77 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 78 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 79 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 80 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 81 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 82 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 83 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 84 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 85main_body: 86 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 87 %out = bitcast i32 %v to float 88 ret float %out 89} 90 91define amdgpu_ps float @atomic_sub_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 92 ; GFX9-LABEL: name: atomic_sub_1d 93 ; GFX9: bb.1.main_body: 94 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 95 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 96 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 97 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 98 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 99 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 100 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 101 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 102 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 103 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 104 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 105 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 106 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 107 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 108 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 109 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 110 ; GFX10NSA-LABEL: name: atomic_sub_1d 111 ; GFX10NSA: bb.1.main_body: 112 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 113 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 114 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 115 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 116 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 117 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 118 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 119 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 120 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 121 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 122 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 123 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 124 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 125 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.sub.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 126 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 127 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 128main_body: 129 %v = call i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 130 %out = bitcast i32 %v to float 131 ret float %out 132} 133 134define amdgpu_ps float @atomic_smin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 135 ; GFX9-LABEL: name: atomic_smin_1d 136 ; GFX9: bb.1.main_body: 137 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 138 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 139 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 140 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 141 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 142 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 143 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 144 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 145 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 146 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 147 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 148 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 149 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 150 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 151 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 152 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 153 ; GFX10NSA-LABEL: name: atomic_smin_1d 154 ; GFX10NSA: bb.1.main_body: 155 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 156 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 157 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 158 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 159 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 160 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 161 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 162 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 163 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 164 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 165 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 166 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 167 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 168 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 169 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 170 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 171main_body: 172 %v = call i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 173 %out = bitcast i32 %v to float 174 ret float %out 175} 176 177 178define amdgpu_ps float @atomic_umin_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 179 ; GFX9-LABEL: name: atomic_umin_1d 180 ; GFX9: bb.1.main_body: 181 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 182 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 183 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 184 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 185 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 186 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 187 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 188 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 189 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 190 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 191 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 192 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 193 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 194 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 195 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 196 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 197 ; GFX10NSA-LABEL: name: atomic_umin_1d 198 ; GFX10NSA: bb.1.main_body: 199 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 200 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 201 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 202 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 203 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 204 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 205 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 206 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 207 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 208 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 209 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 210 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 211 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 212 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umin.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 213 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 214 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 215main_body: 216 %v = call i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 217 %out = bitcast i32 %v to float 218 ret float %out 219} 220 221define amdgpu_ps float @atomic_smax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 222 ; GFX9-LABEL: name: atomic_smax_1d 223 ; GFX9: bb.1.main_body: 224 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 225 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 226 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 227 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 228 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 229 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 230 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 231 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 232 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 233 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 234 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 235 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 236 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 237 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 238 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 239 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 240 ; GFX10NSA-LABEL: name: atomic_smax_1d 241 ; GFX10NSA: bb.1.main_body: 242 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 243 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 244 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 245 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 246 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 247 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 248 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 249 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 250 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 251 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 252 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 253 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 254 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 255 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.smax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 256 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 257 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 258main_body: 259 %v = call i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 260 %out = bitcast i32 %v to float 261 ret float %out 262} 263 264define amdgpu_ps float @atomic_umax_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 265 ; GFX9-LABEL: name: atomic_umax_1d 266 ; GFX9: bb.1.main_body: 267 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 268 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 269 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 270 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 271 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 272 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 273 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 274 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 275 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 276 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 277 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 278 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 279 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 280 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 281 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 282 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 283 ; GFX10NSA-LABEL: name: atomic_umax_1d 284 ; GFX10NSA: bb.1.main_body: 285 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 286 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 287 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 288 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 289 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 290 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 291 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 292 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 293 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 294 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 295 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 296 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 297 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 298 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.umax.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 299 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 300 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 301main_body: 302 %v = call i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 303 %out = bitcast i32 %v to float 304 ret float %out 305} 306 307define amdgpu_ps float @atomic_and_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 308 ; GFX9-LABEL: name: atomic_and_1d 309 ; GFX9: bb.1.main_body: 310 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 311 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 312 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 313 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 314 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 315 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 316 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 317 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 318 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 319 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 320 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 321 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 322 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 323 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 324 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 325 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 326 ; GFX10NSA-LABEL: name: atomic_and_1d 327 ; GFX10NSA: bb.1.main_body: 328 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 329 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 330 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 331 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 332 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 333 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 334 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 335 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 336 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 337 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 338 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 339 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 340 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 341 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.and.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 342 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 343 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 344main_body: 345 %v = call i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 346 %out = bitcast i32 %v to float 347 ret float %out 348} 349 350define amdgpu_ps float @atomic_or_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 351 ; GFX9-LABEL: name: atomic_or_1d 352 ; GFX9: bb.1.main_body: 353 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 354 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 355 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 356 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 357 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 358 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 359 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 360 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 361 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 362 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 363 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 364 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 365 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 366 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 367 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 368 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 369 ; GFX10NSA-LABEL: name: atomic_or_1d 370 ; GFX10NSA: bb.1.main_body: 371 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 372 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 373 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 374 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 375 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 376 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 377 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 378 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 379 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 380 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 381 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 382 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 383 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 384 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.or.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 385 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 386 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 387main_body: 388 %v = call i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 389 %out = bitcast i32 %v to float 390 ret float %out 391} 392 393define amdgpu_ps float @atomic_xor_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 394 ; GFX9-LABEL: name: atomic_xor_1d 395 ; GFX9: bb.1.main_body: 396 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 397 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 398 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 399 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 400 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 401 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 402 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 403 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 404 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 405 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 406 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 407 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 408 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 409 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 410 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 411 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 412 ; GFX10NSA-LABEL: name: atomic_xor_1d 413 ; GFX10NSA: bb.1.main_body: 414 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 415 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 416 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 417 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 418 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 419 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 420 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 421 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 422 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 423 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 424 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 425 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 426 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 427 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.xor.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 428 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 429 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 430main_body: 431 %v = call i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 432 %out = bitcast i32 %v to float 433 ret float %out 434} 435 436define amdgpu_ps float @atomic_inc_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 437 ; GFX9-LABEL: name: atomic_inc_1d 438 ; GFX9: bb.1.main_body: 439 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 440 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 441 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 442 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 443 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 444 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 445 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 446 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 447 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 448 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 449 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 450 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 451 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 452 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 453 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 454 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 455 ; GFX10NSA-LABEL: name: atomic_inc_1d 456 ; GFX10NSA: bb.1.main_body: 457 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 458 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 459 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 460 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 461 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 462 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 463 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 464 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 465 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 466 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 467 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 468 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 469 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 470 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.inc.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 471 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 472 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 473main_body: 474 %v = call i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 475 %out = bitcast i32 %v to float 476 ret float %out 477} 478 479define amdgpu_ps float @atomic_dec_1d(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 480 ; GFX9-LABEL: name: atomic_dec_1d 481 ; GFX9: bb.1.main_body: 482 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 483 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 484 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 485 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 486 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 487 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 488 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 489 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 490 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 491 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 492 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 493 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 494 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 495 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 496 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 497 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 498 ; GFX10NSA-LABEL: name: atomic_dec_1d 499 ; GFX10NSA: bb.1.main_body: 500 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 501 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 502 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 503 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 504 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 505 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 506 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 507 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 508 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 509 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 510 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 511 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 512 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 513 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.dec.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 514 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 515 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 516main_body: 517 %v = call i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 518 %out = bitcast i32 %v to float 519 ret float %out 520} 521 522define amdgpu_ps float @atomic_cmpswap_1d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s) { 523 ; GFX9-LABEL: name: atomic_cmpswap_1d 524 ; GFX9: bb.1.main_body: 525 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 526 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 527 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 528 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 529 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 530 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 531 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 532 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 533 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 534 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 535 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 536 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 537 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) 538 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 539 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 540 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 541 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 542 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 543 ; GFX10NSA-LABEL: name: atomic_cmpswap_1d 544 ; GFX10NSA: bb.1.main_body: 545 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 546 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 547 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 548 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 549 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 550 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 551 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 552 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 553 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 554 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 555 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 556 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 557 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY10]](s32) 558 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 559 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 560 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.1d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 561 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 562 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 563main_body: 564 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32 %cmp, i32 %swap, i16 %s, <8 x i32> %rsrc, i32 0, i32 0) 565 %out = bitcast i32 %v to float 566 ret float %out 567} 568 569define amdgpu_ps float @atomic_add_2d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t) { 570 ; GFX9-LABEL: name: atomic_add_2d 571 ; GFX9: bb.1.main_body: 572 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 573 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 574 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 575 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 576 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 577 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 578 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 579 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 580 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 581 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 582 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 583 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 584 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 585 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 586 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 587 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) 588 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 589 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 590 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 591 ; GFX10NSA-LABEL: name: atomic_add_2d 592 ; GFX10NSA: bb.1.main_body: 593 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 594 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 595 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 596 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 597 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 598 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 599 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 600 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 601 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 602 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 603 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 604 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 605 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 606 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 607 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 608 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) 609 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2d), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 610 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 611 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 612main_body: 613 %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32 %data, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 614 %out = bitcast i32 %v to float 615 ret float %out 616} 617 618define amdgpu_ps float @atomic_add_3d(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %r) { 619 ; GFX9-LABEL: name: atomic_add_3d 620 ; GFX9: bb.1.main_body: 621 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 622 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 623 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 624 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 625 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 626 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 627 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 628 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 629 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 630 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 631 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 632 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 633 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 634 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 635 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 636 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 637 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 638 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 639 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 640 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 641 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 642 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 643 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 644 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 645 ; GFX10NSA-LABEL: name: atomic_add_3d 646 ; GFX10NSA: bb.1.main_body: 647 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 648 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 649 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 650 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 651 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 652 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 653 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 654 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 655 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 656 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 657 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 658 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 659 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 660 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 661 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 662 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 663 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 664 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 665 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 666 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 667 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 668 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.3d), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 669 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 670 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 671main_body: 672 %v = call i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32 %data, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 673 %out = bitcast i32 %v to float 674 ret float %out 675} 676 677define amdgpu_ps float @atomic_add_cube(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %face) { 678 ; GFX9-LABEL: name: atomic_add_cube 679 ; GFX9: bb.1.main_body: 680 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 681 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 682 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 683 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 684 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 685 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 686 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 687 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 688 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 689 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 690 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 691 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 692 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 693 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 694 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 695 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 696 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 697 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 698 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 699 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 700 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 701 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 702 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 703 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 704 ; GFX10NSA-LABEL: name: atomic_add_cube 705 ; GFX10NSA: bb.1.main_body: 706 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 707 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 708 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 709 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 710 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 711 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 712 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 713 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 714 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 715 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 716 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 717 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 718 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 719 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 720 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 721 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 722 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 723 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 724 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 725 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 726 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 727 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.cube), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 728 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 729 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 730main_body: 731 %v = call i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32 %data, i16 %s, i16 %t, i16 %face, <8 x i32> %rsrc, i32 0, i32 0) 732 %out = bitcast i32 %v to float 733 ret float %out 734} 735 736define amdgpu_ps float @atomic_add_1darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %slice) { 737 ; GFX9-LABEL: name: atomic_add_1darray 738 ; GFX9: bb.1.main_body: 739 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 740 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 741 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 742 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 743 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 744 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 745 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 746 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 747 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 748 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 749 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 750 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 751 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 752 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 753 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 754 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) 755 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 756 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 757 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 758 ; GFX10NSA-LABEL: name: atomic_add_1darray 759 ; GFX10NSA: bb.1.main_body: 760 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2 761 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 762 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 763 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 764 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 765 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 766 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 767 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 768 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 769 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 770 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 771 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 772 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 773 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 774 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 775 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY11]](s32), [[COPY12]](s32) 776 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1darray), [[COPY8]](s32), [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 777 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 778 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 779main_body: 780 %v = call i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32 %data, i16 %s, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 781 %out = bitcast i32 %v to float 782 ret float %out 783} 784 785define amdgpu_ps float @atomic_add_2darray(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice) { 786 ; GFX9-LABEL: name: atomic_add_2darray 787 ; GFX9: bb.1.main_body: 788 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 789 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 790 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 791 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 792 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 793 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 794 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 795 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 796 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 797 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 798 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 799 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 800 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 801 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 802 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 803 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 804 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 805 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 806 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 807 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 808 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 809 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 810 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 811 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 812 ; GFX10NSA-LABEL: name: atomic_add_2darray 813 ; GFX10NSA: bb.1.main_body: 814 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 815 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 816 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 817 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 818 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 819 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 820 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 821 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 822 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 823 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 824 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 825 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 826 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 827 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 828 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 829 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 830 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 831 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 832 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 833 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 834 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 835 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darray), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 836 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 837 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 838main_body: 839 %v = call i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, <8 x i32> %rsrc, i32 0, i32 0) 840 %out = bitcast i32 %v to float 841 ret float %out 842} 843 844define amdgpu_ps float @atomic_add_2dmsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %fragid) { 845 ; GFX9-LABEL: name: atomic_add_2dmsaa 846 ; GFX9: bb.1.main_body: 847 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 848 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 849 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 850 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 851 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 852 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 853 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 854 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 855 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 856 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 857 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 858 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 859 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 860 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 861 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 862 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 863 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 864 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 865 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 866 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 867 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 868 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 869 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 870 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 871 ; GFX10NSA-LABEL: name: atomic_add_2dmsaa 872 ; GFX10NSA: bb.1.main_body: 873 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 874 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 875 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 876 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 877 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 878 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 879 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 880 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 881 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 882 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 883 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 884 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 885 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 886 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 887 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 888 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 889 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 890 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 891 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 892 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32) 893 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 894 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2dmsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 895 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 896 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 897main_body: 898 %v = call i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 899 %out = bitcast i32 %v to float 900 ret float %out 901} 902 903define amdgpu_ps float @atomic_add_2darraymsaa(<8 x i32> inreg %rsrc, i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 904 ; GFX9-LABEL: name: atomic_add_2darraymsaa 905 ; GFX9: bb.1.main_body: 906 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 907 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 908 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 909 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 910 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 911 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 912 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 913 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 914 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 915 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 916 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 917 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 918 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 919 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 920 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 921 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 922 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 923 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 924 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 925 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) 926 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) 927 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 928 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 929 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 930 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 931 ; GFX10NSA-LABEL: name: atomic_add_2darraymsaa 932 ; GFX10NSA: bb.1.main_body: 933 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 934 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 935 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 936 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 937 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 938 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 939 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 940 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 941 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 942 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 943 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 944 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 945 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 946 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 947 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 948 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY9]](s32) 949 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 950 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 951 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 952 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) 953 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[COPY16]](s32) 954 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 955 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.2darraymsaa), [[COPY8]](s32), [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 956 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 957 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 958main_body: 959 %v = call i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32 %data, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 960 %out = bitcast i32 %v to float 961 ret float %out 962} 963 964define amdgpu_ps float @atomic_add_1d_slc(<8 x i32> inreg %rsrc, i32 %data, i16 %s) { 965 ; GFX9-LABEL: name: atomic_add_1d_slc 966 ; GFX9: bb.1.main_body: 967 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 968 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 969 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 970 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 971 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 972 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 973 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 974 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 975 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 976 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 977 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 978 ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 979 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 980 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 981 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 982 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 983 ; GFX10NSA-LABEL: name: atomic_add_1d_slc 984 ; GFX10NSA: bb.1.main_body: 985 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1 986 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 987 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 988 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 989 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 990 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 991 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 992 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 993 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 994 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 995 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 996 ; GFX10NSA: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY9]](s32) 997 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 998 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.add.1d), [[COPY8]](s32), [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 0, 2, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 999 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1000 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 1001main_body: 1002 %v = call i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32 %data, i16 %s, <8 x i32> %rsrc, i32 0, i32 2) 1003 %out = bitcast i32 %v to float 1004 ret float %out 1005} 1006 1007define amdgpu_ps float @atomic_cmpswap_2d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t) { 1008 ; GFX9-LABEL: name: atomic_cmpswap_2d 1009 ; GFX9: bb.1.main_body: 1010 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1011 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1012 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1013 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1014 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1015 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1016 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1017 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1018 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1019 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1020 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1021 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1022 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1023 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1024 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1025 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 1026 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 1027 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 1028 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 1029 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1030 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 1031 ; GFX10NSA-LABEL: name: atomic_cmpswap_2d 1032 ; GFX10NSA: bb.1.main_body: 1033 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3 1034 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1035 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1036 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1037 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1038 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1039 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1040 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1041 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1042 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1043 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1044 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1045 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1046 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1047 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1048 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 1049 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 1050 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32) 1051 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 1052 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1053 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 1054main_body: 1055 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, <8 x i32> %rsrc, i32 0, i32 0) 1056 %out = bitcast i32 %v to float 1057 ret float %out 1058} 1059 1060define amdgpu_ps float @atomic_cmpswap_3d(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r) { 1061 ; GFX9-LABEL: name: atomic_cmpswap_3d 1062 ; GFX9: bb.1.main_body: 1063 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1064 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1065 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1066 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1067 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1068 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1069 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1070 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1071 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1072 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1073 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1074 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1075 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1076 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 1077 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1078 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1079 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 1080 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 1081 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1082 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) 1083 ; GFX9: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1084 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) 1085 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1086 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 1087 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1088 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 1089 ; GFX10NSA-LABEL: name: atomic_cmpswap_3d 1090 ; GFX10NSA: bb.1.main_body: 1091 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 1092 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1093 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1094 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1095 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1096 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1097 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1098 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1099 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1100 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1101 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1102 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1103 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1104 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 1105 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1106 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1107 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 1108 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 1109 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY13]](s32), [[COPY14]](s32) 1110 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) 1111 ; GFX10NSA: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 1112 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY15]](s32), [[DEF]](s32) 1113 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1114 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.3d), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 1115 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1116 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 1117main_body: 1118 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 0, i32 0) 1119 %out = bitcast i32 %v to float 1120 ret float %out 1121} 1122 1123define amdgpu_ps float @atomic_cmpswap_2darraymsaa(<8 x i32> inreg %rsrc, i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 1124 ; GFX9-LABEL: name: atomic_cmpswap_2darraymsaa 1125 ; GFX9: bb.1.main_body: 1126 ; GFX9: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1127 ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1128 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1129 ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1130 ; GFX9: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1131 ; GFX9: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1132 ; GFX9: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1133 ; GFX9: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1134 ; GFX9: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1135 ; GFX9: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1136 ; GFX9: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1137 ; GFX9: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1138 ; GFX9: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1139 ; GFX9: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 1140 ; GFX9: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 1141 ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1142 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1143 ; GFX9: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 1144 ; GFX9: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 1145 ; GFX9: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1146 ; GFX9: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) 1147 ; GFX9: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) 1148 ; GFX9: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1149 ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1150 ; GFX9: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 1151 ; GFX9: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1152 ; GFX9: SI_RETURN_TO_EPILOG implicit $vgpr0 1153 ; GFX10NSA-LABEL: name: atomic_cmpswap_2darraymsaa 1154 ; GFX10NSA: bb.1.main_body: 1155 ; GFX10NSA: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 1156 ; GFX10NSA: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 1157 ; GFX10NSA: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3 1158 ; GFX10NSA: [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4 1159 ; GFX10NSA: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5 1160 ; GFX10NSA: [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6 1161 ; GFX10NSA: [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7 1162 ; GFX10NSA: [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8 1163 ; GFX10NSA: [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9 1164 ; GFX10NSA: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0 1165 ; GFX10NSA: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1 1166 ; GFX10NSA: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2 1167 ; GFX10NSA: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3 1168 ; GFX10NSA: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4 1169 ; GFX10NSA: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5 1170 ; GFX10NSA: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) 1171 ; GFX10NSA: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32) 1172 ; GFX10NSA: [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY10]](s32) 1173 ; GFX10NSA: [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY11]](s32) 1174 ; GFX10NSA: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[COPY15]](s32) 1175 ; GFX10NSA: [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY12]](s32) 1176 ; GFX10NSA: [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY13]](s32) 1177 ; GFX10NSA: [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY16]](s32), [[COPY17]](s32) 1178 ; GFX10NSA: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>) 1179 ; GFX10NSA: [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(s32) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.cmpswap.2darraymsaa), [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (volatile dereferenceable load store 4 on custom "TargetCustom8") 1180 ; GFX10NSA: $vgpr0 = COPY [[AMDGPU_INTRIN_IMAGE_LOAD]](s32) 1181 ; GFX10NSA: SI_RETURN_TO_EPILOG implicit $vgpr0 1182main_body: 1183 %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32 %cmp, i32 %swap, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 1184 %out = bitcast i32 %v to float 1185 ret float %out 1186} 1187 1188declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1189declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1190declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1191declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1192declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1193declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1194declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1195declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1196declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1197declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1198declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1199declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1200declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1201declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1202declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1203declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1204declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1205declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1206declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1207declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1208declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1209declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1210declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1211declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1212declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1213declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1214declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 1215 1216attributes #0 = { nounwind } 1217