1; RUN: opt -S -mtriple=amdgcn-- -codegenprepare < %s | FileCheck -check-prefix=OPT %s 2; RUN: opt -S -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -codegenprepare < %s | FileCheck -check-prefix=OPT %s 3; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 5 6; This particular case will actually be worse in terms of code size 7; from sinking into both. 8 9; OPT-LABEL: @sink_ubfe_i32( 10; OPT: entry: 11; OPT-NEXT: br i1 12 13; OPT: bb0: 14; OPT: %0 = lshr i32 %arg1, 8 15; OPT-NEXT: %val0 = and i32 %0, 255 16; OPT: br label 17 18; OPT: bb1: 19; OPT: %1 = lshr i32 %arg1, 8 20; OPT-NEXT: %val1 = and i32 %1, 127 21; OPT: br label 22 23; OPT: ret: 24; OPT: store 25; OPT: ret 26 27 28; GCN-LABEL: {{^}}sink_ubfe_i32: 29; GCN-NOT: lshr 30; GCN: s_cbranch_scc{{[0-1]}} 31 32; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008 33; GCN: BB0_3: 34; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008 35 36; GCN: buffer_store_dword 37; GCN: s_endpgm 38define amdgpu_kernel void @sink_ubfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 { 39entry: 40 %shr = lshr i32 %arg1, 8 41 br i1 undef, label %bb0, label %bb1 42 43bb0: 44 %val0 = and i32 %shr, 255 45 store volatile i32 0, i32 addrspace(1)* undef 46 br label %ret 47 48bb1: 49 %val1 = and i32 %shr, 127 50 store volatile i32 0, i32 addrspace(1)* undef 51 br label %ret 52 53ret: 54 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ] 55 store i32 %phi, i32 addrspace(1)* %out 56 ret void 57} 58 59; OPT-LABEL: @sink_sbfe_i32( 60; OPT: entry: 61; OPT-NEXT: br i1 62 63; OPT: bb0: 64; OPT: %0 = ashr i32 %arg1, 8 65; OPT-NEXT: %val0 = and i32 %0, 255 66; OPT: br label 67 68; OPT: bb1: 69; OPT: %1 = ashr i32 %arg1, 8 70; OPT-NEXT: %val1 = and i32 %1, 127 71; OPT: br label 72 73; OPT: ret: 74; OPT: store 75; OPT: ret 76 77; GCN-LABEL: {{^}}sink_sbfe_i32: 78define amdgpu_kernel void @sink_sbfe_i32(i32 addrspace(1)* %out, i32 %arg1) #0 { 79entry: 80 %shr = ashr i32 %arg1, 8 81 br i1 undef, label %bb0, label %bb1 82 83bb0: 84 %val0 = and i32 %shr, 255 85 store volatile i32 0, i32 addrspace(1)* undef 86 br label %ret 87 88bb1: 89 %val1 = and i32 %shr, 127 90 store volatile i32 0, i32 addrspace(1)* undef 91 br label %ret 92 93ret: 94 %phi = phi i32 [ %val0, %bb0 ], [ %val1, %bb1 ] 95 store i32 %phi, i32 addrspace(1)* %out 96 ret void 97} 98 99 100; OPT-LABEL: @sink_ubfe_i16( 101; OPT: entry: 102; OPT-NEXT: br i1 103 104; OPT: bb0: 105; OPT: %0 = lshr i16 %arg1, 4 106; OPT-NEXT: %val0 = and i16 %0, 255 107; OPT: br label 108 109; OPT: bb1: 110; OPT: %1 = lshr i16 %arg1, 4 111; OPT-NEXT: %val1 = and i16 %1, 127 112; OPT: br label 113 114; OPT: ret: 115; OPT: store 116; OPT: ret 117 118; For GFX8: since i16 is legal type, we cannot sink lshr into BBs. 119 120; GCN-LABEL: {{^}}sink_ubfe_i16: 121; GCN-NOT: lshr 122; VI: s_load_dword [[ARG:s[0-9]+]], s[0:1], 0x2c 123; VI: s_bfe_u32 [[BFE:s[0-9]+]], [[ARG]], 0xc0004 124; GCN: s_cbranch_scc{{[0-1]}} 125 126; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004 127; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f 128 129; GCN: BB2_3: 130; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004 131; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff 132 133; GCN: buffer_store_short 134; GCN: s_endpgm 135define amdgpu_kernel void @sink_ubfe_i16(i16 addrspace(1)* %out, i16 %arg1) #0 { 136entry: 137 %shr = lshr i16 %arg1, 4 138 br i1 undef, label %bb0, label %bb1 139 140bb0: 141 %val0 = and i16 %shr, 255 142 store volatile i16 0, i16 addrspace(1)* undef 143 br label %ret 144 145bb1: 146 %val1 = and i16 %shr, 127 147 store volatile i16 0, i16 addrspace(1)* undef 148 br label %ret 149 150ret: 151 %phi = phi i16 [ %val0, %bb0 ], [ %val1, %bb1 ] 152 store i16 %phi, i16 addrspace(1)* %out 153 ret void 154} 155 156; We don't really want to sink this one since it isn't reducible to a 157; 32-bit BFE on one half of the integer. 158 159; OPT-LABEL: @sink_ubfe_i64_span_midpoint( 160; OPT: entry: 161; OPT-NOT: lshr 162; OPT: br i1 163 164; OPT: bb0: 165; OPT: %0 = lshr i64 %arg1, 30 166; OPT-NEXT: %val0 = and i64 %0, 255 167 168; OPT: bb1: 169; OPT: %1 = lshr i64 %arg1, 30 170; OPT-NEXT: %val1 = and i64 %1, 127 171 172; OPT: ret: 173; OPT: store 174; OPT: ret 175 176; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint: 177 178; GCN: s_cbranch_scc{{[0-1]}} BB3_2 179; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30 180; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]] 181 182; GCN: BB3_3: 183; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]] 184 185; GCN: buffer_store_dwordx2 186define amdgpu_kernel void @sink_ubfe_i64_span_midpoint(i64 addrspace(1)* %out, i64 %arg1) #0 { 187entry: 188 %shr = lshr i64 %arg1, 30 189 br i1 undef, label %bb0, label %bb1 190 191bb0: 192 %val0 = and i64 %shr, 255 193 store volatile i32 0, i32 addrspace(1)* undef 194 br label %ret 195 196bb1: 197 %val1 = and i64 %shr, 127 198 store volatile i32 0, i32 addrspace(1)* undef 199 br label %ret 200 201ret: 202 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] 203 store i64 %phi, i64 addrspace(1)* %out 204 ret void 205} 206 207; OPT-LABEL: @sink_ubfe_i64_low32( 208; OPT: entry: 209; OPT-NOT: lshr 210; OPT: br i1 211 212; OPT: bb0: 213; OPT: %0 = lshr i64 %arg1, 15 214; OPT-NEXT: %val0 = and i64 %0, 255 215 216; OPT: bb1: 217; OPT: %1 = lshr i64 %arg1, 15 218; OPT-NEXT: %val1 = and i64 %1, 127 219 220; OPT: ret: 221; OPT: store 222; OPT: ret 223 224; GCN-LABEL: {{^}}sink_ubfe_i64_low32: 225 226; GCN: s_cbranch_scc{{[0-1]}} BB4_2 227 228; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f 229 230; GCN: BB4_3: 231; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f 232 233; GCN: buffer_store_dwordx2 234define amdgpu_kernel void @sink_ubfe_i64_low32(i64 addrspace(1)* %out, i64 %arg1) #0 { 235entry: 236 %shr = lshr i64 %arg1, 15 237 br i1 undef, label %bb0, label %bb1 238 239bb0: 240 %val0 = and i64 %shr, 255 241 store volatile i32 0, i32 addrspace(1)* undef 242 br label %ret 243 244bb1: 245 %val1 = and i64 %shr, 127 246 store volatile i32 0, i32 addrspace(1)* undef 247 br label %ret 248 249ret: 250 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] 251 store i64 %phi, i64 addrspace(1)* %out 252 ret void 253} 254 255; OPT-LABEL: @sink_ubfe_i64_high32( 256; OPT: entry: 257; OPT-NOT: lshr 258; OPT: br i1 259 260; OPT: bb0: 261; OPT: %0 = lshr i64 %arg1, 35 262; OPT-NEXT: %val0 = and i64 %0, 255 263 264; OPT: bb1: 265; OPT: %1 = lshr i64 %arg1, 35 266; OPT-NEXT: %val1 = and i64 %1, 127 267 268; OPT: ret: 269; OPT: store 270; OPT: ret 271 272; GCN-LABEL: {{^}}sink_ubfe_i64_high32: 273; GCN: s_cbranch_scc{{[0-1]}} BB5_2 274; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003 275 276; GCN: BB5_3: 277; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003 278 279; GCN: buffer_store_dwordx2 280define amdgpu_kernel void @sink_ubfe_i64_high32(i64 addrspace(1)* %out, i64 %arg1) #0 { 281entry: 282 %shr = lshr i64 %arg1, 35 283 br i1 undef, label %bb0, label %bb1 284 285bb0: 286 %val0 = and i64 %shr, 255 287 store volatile i32 0, i32 addrspace(1)* undef 288 br label %ret 289 290bb1: 291 %val1 = and i64 %shr, 127 292 store volatile i32 0, i32 addrspace(1)* undef 293 br label %ret 294 295ret: 296 %phi = phi i64 [ %val0, %bb0 ], [ %val1, %bb1 ] 297 store i64 %phi, i64 addrspace(1)* %out 298 ret void 299} 300 301attributes #0 = { nounwind } 302