1# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s 2--- | 3 define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 { 4 %f16.val0 = load volatile half, half addrspace(1)* undef 5 %f16.val1 = load volatile half, half addrspace(1)* undef 6 %f32.val = load volatile float, float addrspace(1)* undef 7 %f16.add0 = fadd half %f16.val0, 0xH3C00 8 %f32.add = fadd float %f32.val, 1.000000e+00 9 store volatile half %f16.add0, half addrspace(1)* undef 10 store volatile float %f32.add, float addrspace(1)* undef 11 ret void 12 } 13 14 define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 { 15 %f16.val0 = load volatile half, half addrspace(1)* undef 16 %f16.val1 = load volatile half, half addrspace(1)* undef 17 %f32.val = load volatile float, float addrspace(1)* undef 18 %f16.add0 = fadd half %f16.val0, 0xH3C00 19 %f32.add = fadd float %f32.val, 1.000000e+00 20 store volatile half %f16.add0, half addrspace(1)* undef 21 store volatile float %f32.add, float addrspace(1)* undef 22 ret void 23 } 24 25 define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 { 26 %f16.val0 = load volatile half, half addrspace(1)* undef 27 %f16.val1 = load volatile half, half addrspace(1)* undef 28 %f32.val = load volatile float, float addrspace(1)* undef 29 %f16.add0 = fadd half %f16.val0, 0xH3C00 30 %f32.add = fadd float %f32.val, 1.000000e+00 31 store volatile half %f16.add0, half addrspace(1)* undef 32 store volatile float %f32.add, float addrspace(1)* undef 33 ret void 34 } 35 36 define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 { 37 %f16.val0 = load volatile half, half addrspace(1)* undef 38 %f16.val1 = load volatile half, half addrspace(1)* undef 39 %f32.val = load volatile float, float addrspace(1)* undef 40 %f16.add0 = fadd half %f16.val0, 0xH3C00 41 %f16.add1 = fadd half %f16.val1, 0xH3C00 42 %f32.add = fadd float %f32.val, 1.000000e+00 43 store volatile half %f16.add0, half addrspace(1)* undef 44 store volatile half %f16.add1, half addrspace(1)* undef 45 store volatile float %f32.add, float addrspace(1)* undef 46 ret void 47 } 48 49 define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 { 50 %f16.val0 = load volatile half, half addrspace(1)* undef 51 %f16.val1 = load volatile half, half addrspace(1)* undef 52 %f16.add0 = fadd half %f16.val0, 0xH0001 53 %f16.add1 = fadd half %f16.val1, 0xH0001 54 store volatile half %f16.add0, half addrspace(1)* undef 55 store volatile half %f16.add1,half addrspace(1)* undef 56 ret void 57 } 58 59 define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 { 60 %f16.val0 = load volatile half, half addrspace(1)* undef 61 %f16.val1 = load volatile half, half addrspace(1)* undef 62 %f32.val = load volatile float, float addrspace(1)* undef 63 %f16.add0 = fadd half %f16.val0, 0xHFFFE 64 %f16.add1 = fadd half %f16.val1, 0xHFFFE 65 %f32.add = fadd float %f32.val, 0xffffffffc0000000 66 store volatile half %f16.add0, half addrspace(1)* undef 67 store volatile half %f16.add1, half addrspace(1)* undef 68 store volatile float %f32.add, float addrspace(1)* undef 69 ret void 70 } 71 72 define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 { 73 %f32.val0 = load volatile float, float addrspace(1)* undef 74 %f32.val1 = load volatile float, float addrspace(1)* undef 75 %f32.val = load volatile float, float addrspace(1)* undef 76 %f32.add0 = fadd float %f32.val0, 1.0 77 %f32.add1 = fadd float %f32.val1, 1.0 78 store volatile float %f32.add0, float addrspace(1)* undef 79 store volatile float %f32.add1, float addrspace(1)* undef 80 ret void 81 } 82 83 define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 { 84 %f16.val0 = load volatile half, half addrspace(1)* undef 85 %f16.val1 = load volatile half, half addrspace(1)* undef 86 %f32.val = load volatile half, half addrspace(1)* undef 87 %f16.add0 = fadd half %f16.val0, 0xH3C00 88 %f32.add = fadd half %f32.val, 1.000000e+00 89 store volatile half %f16.add0, half addrspace(1)* undef 90 store volatile half %f32.add, half addrspace(1)* undef 91 ret void 92 } 93 94 define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 { 95 %f16.val0 = load volatile half, half addrspace(1)* undef 96 %f16.val1 = load volatile half, half addrspace(1)* undef 97 %f32.val = load volatile half, half addrspace(1)* undef 98 %f16.add0 = fadd half %f16.val0, 0xH3C00 99 %f32.add = fadd half %f32.val, 1.000000e+00 100 store volatile half %f16.add0, half addrspace(1)* undef 101 store volatile half %f32.add, half addrspace(1)* undef 102 ret void 103 } 104 105 attributes #0 = { nounwind } 106 107... 108--- 109 110# f32 1.0 with a single use should be folded as the low 32-bits of a 111# literal constant. 112 113# CHECK-LABEL: name: add_f32_1.0_one_f16_use 114# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $exec 115 116name: add_f32_1.0_one_f16_use 117alignment: 0 118exposesReturnsTwice: false 119legalized: false 120regBankSelected: false 121selected: false 122tracksRegLiveness: true 123registers: 124 - { id: 0, class: sreg_64 } 125 - { id: 1, class: sreg_32 } 126 - { id: 2, class: sgpr_32 } 127 - { id: 3, class: vgpr_32 } 128 - { id: 4, class: sreg_64 } 129 - { id: 5, class: sreg_32 } 130 - { id: 6, class: sreg_64 } 131 - { id: 7, class: sreg_32 } 132 - { id: 8, class: sreg_32 } 133 - { id: 9, class: sreg_32 } 134 - { id: 10, class: sreg_128 } 135 - { id: 11, class: vgpr_32 } 136 - { id: 12, class: vgpr_32 } 137 - { id: 13, class: vgpr_32 } 138frameInfo: 139 isFrameAddressTaken: false 140 isReturnAddressTaken: false 141 hasStackMap: false 142 hasPatchPoint: false 143 stackSize: 0 144 offsetAdjustment: 0 145 maxAlignment: 0 146 adjustsStack: false 147 hasCalls: false 148 maxCallFrameSize: 0 149 hasOpaqueSPAdjustment: false 150 hasVAStart: false 151 hasMustTailInVarArgFunc: false 152body: | 153 bb.0 (%ir-block.0): 154 %4 = IMPLICIT_DEF 155 %5 = COPY %4.sub1 156 %6 = IMPLICIT_DEF 157 %7 = COPY %6.sub0 158 %8 = S_MOV_B32 61440 159 %9 = S_MOV_B32 -1 160 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 161 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 162 %12 = V_MOV_B32_e32 1065353216, implicit $exec 163 %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec 164 BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 165 S_ENDPGM 166 167... 168--- 169# Materialized f32 inline immediate should not be folded into the f16 170# operands 171 172# CHECK-LABEL: name: add_f32_1.0_multi_f16_use 173# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec 174# CHECK: %14:vgpr_32 = V_ADD_F16_e32 killed %11, %13, implicit $exec 175# CHECK: %15:vgpr_32 = V_ADD_F16_e32 killed %12, killed %13, implicit $exec 176 177 178name: add_f32_1.0_multi_f16_use 179alignment: 0 180exposesReturnsTwice: false 181legalized: false 182regBankSelected: false 183selected: false 184tracksRegLiveness: true 185registers: 186 - { id: 0, class: sreg_64 } 187 - { id: 1, class: sreg_32 } 188 - { id: 2, class: sgpr_32 } 189 - { id: 3, class: vgpr_32 } 190 - { id: 4, class: sreg_64 } 191 - { id: 5, class: sreg_32 } 192 - { id: 6, class: sreg_64 } 193 - { id: 7, class: sreg_32 } 194 - { id: 8, class: sreg_32 } 195 - { id: 9, class: sreg_32 } 196 - { id: 10, class: sreg_128 } 197 - { id: 11, class: vgpr_32 } 198 - { id: 12, class: vgpr_32 } 199 - { id: 13, class: vgpr_32 } 200 - { id: 14, class: vgpr_32 } 201 - { id: 15, class: vgpr_32 } 202frameInfo: 203 isFrameAddressTaken: false 204 isReturnAddressTaken: false 205 hasStackMap: false 206 hasPatchPoint: false 207 stackSize: 0 208 offsetAdjustment: 0 209 maxAlignment: 0 210 adjustsStack: false 211 hasCalls: false 212 maxCallFrameSize: 0 213 hasOpaqueSPAdjustment: false 214 hasVAStart: false 215 hasMustTailInVarArgFunc: false 216body: | 217 bb.0 (%ir-block.0): 218 %4 = IMPLICIT_DEF 219 %5 = COPY %4.sub1 220 %6 = IMPLICIT_DEF 221 %7 = COPY %6.sub0 222 %8 = S_MOV_B32 61440 223 %9 = S_MOV_B32 -1 224 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 225 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 226 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 227 %13 = V_MOV_B32_e32 1065353216, implicit $exec 228 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec 229 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec 230 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 231 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 232 S_ENDPGM 233 234... 235--- 236 237# f32 1.0 should be folded into the single f32 use as an inline 238# immediate, and folded into the single f16 use as a literal constant 239 240# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use 241# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $exec 242# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec 243 244name: add_f32_1.0_one_f32_use_one_f16_use 245alignment: 0 246exposesReturnsTwice: false 247legalized: false 248regBankSelected: false 249selected: false 250tracksRegLiveness: true 251registers: 252 - { id: 0, class: sreg_64 } 253 - { id: 1, class: sreg_32 } 254 - { id: 2, class: sgpr_32 } 255 - { id: 3, class: vgpr_32 } 256 - { id: 4, class: sreg_64 } 257 - { id: 5, class: sreg_32 } 258 - { id: 6, class: sreg_64 } 259 - { id: 7, class: sreg_32 } 260 - { id: 8, class: sreg_32 } 261 - { id: 9, class: sreg_32 } 262 - { id: 10, class: sreg_128 } 263 - { id: 11, class: vgpr_32 } 264 - { id: 12, class: vgpr_32 } 265 - { id: 13, class: vgpr_32 } 266 - { id: 14, class: vgpr_32 } 267 - { id: 15, class: vgpr_32 } 268 - { id: 16, class: vgpr_32 } 269frameInfo: 270 isFrameAddressTaken: false 271 isReturnAddressTaken: false 272 hasStackMap: false 273 hasPatchPoint: false 274 stackSize: 0 275 offsetAdjustment: 0 276 maxAlignment: 0 277 adjustsStack: false 278 hasCalls: false 279 maxCallFrameSize: 0 280 hasOpaqueSPAdjustment: false 281 hasVAStart: false 282 hasMustTailInVarArgFunc: false 283body: | 284 bb.0 (%ir-block.0): 285 %4 = IMPLICIT_DEF 286 %5 = COPY %4.sub1 287 %6 = IMPLICIT_DEF 288 %7 = COPY %6.sub0 289 %8 = S_MOV_B32 61440 290 %9 = S_MOV_B32 -1 291 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 292 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 293 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 294 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 295 %14 = V_MOV_B32_e32 1065353216, implicit $exec 296 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec 297 %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec 298 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 299 BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 300 S_ENDPGM 301 302... 303--- 304 305# f32 1.0 should be folded for the single f32 use as an inline 306# constant, and not folded as a multi-use literal for the f16 cases 307 308# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use 309# CHECK: %14:vgpr_32 = V_MOV_B32_e32 1065353216, implicit $exec 310# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %11, %14, implicit $exec 311# CHECK: %16:vgpr_32 = V_ADD_F16_e32 %12, %14, implicit $exec 312# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $exec 313 314name: add_f32_1.0_one_f32_use_multi_f16_use 315alignment: 0 316exposesReturnsTwice: false 317legalized: false 318regBankSelected: false 319selected: false 320tracksRegLiveness: true 321registers: 322 - { id: 0, class: sreg_64 } 323 - { id: 1, class: sreg_32 } 324 - { id: 2, class: sgpr_32 } 325 - { id: 3, class: vgpr_32 } 326 - { id: 4, class: sreg_64 } 327 - { id: 5, class: sreg_32 } 328 - { id: 6, class: sreg_64 } 329 - { id: 7, class: sreg_32 } 330 - { id: 8, class: sreg_32 } 331 - { id: 9, class: sreg_32 } 332 - { id: 10, class: sreg_128 } 333 - { id: 11, class: vgpr_32 } 334 - { id: 12, class: vgpr_32 } 335 - { id: 13, class: vgpr_32 } 336 - { id: 14, class: vgpr_32 } 337 - { id: 15, class: vgpr_32 } 338 - { id: 16, class: vgpr_32 } 339 - { id: 17, class: vgpr_32 } 340frameInfo: 341 isFrameAddressTaken: false 342 isReturnAddressTaken: false 343 hasStackMap: false 344 hasPatchPoint: false 345 stackSize: 0 346 offsetAdjustment: 0 347 maxAlignment: 0 348 adjustsStack: false 349 hasCalls: false 350 maxCallFrameSize: 0 351 hasOpaqueSPAdjustment: false 352 hasVAStart: false 353 hasMustTailInVarArgFunc: false 354body: | 355 bb.0 (%ir-block.0): 356 %4 = IMPLICIT_DEF 357 %5 = COPY %4.sub1 358 %6 = IMPLICIT_DEF 359 %7 = COPY %6.sub0 360 %8 = S_MOV_B32 61440 361 %9 = S_MOV_B32 -1 362 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 363 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 364 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 365 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 366 %14 = V_MOV_B32_e32 1065353216, implicit $exec 367 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec 368 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec 369 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec 370 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 371 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 372 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 373 S_ENDPGM 374 375... 376--- 377# CHECK-LABEL: name: add_i32_1_multi_f16_use 378# CHECK: %13:vgpr_32 = V_MOV_B32_e32 1, implicit $exec 379# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $exec 380# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $exec 381 382 383name: add_i32_1_multi_f16_use 384alignment: 0 385exposesReturnsTwice: false 386legalized: false 387regBankSelected: false 388selected: false 389tracksRegLiveness: true 390registers: 391 - { id: 0, class: sreg_64 } 392 - { id: 1, class: sreg_32 } 393 - { id: 2, class: sgpr_32 } 394 - { id: 3, class: vgpr_32 } 395 - { id: 4, class: sreg_64 } 396 - { id: 5, class: sreg_32 } 397 - { id: 6, class: sreg_64 } 398 - { id: 7, class: sreg_32 } 399 - { id: 8, class: sreg_32 } 400 - { id: 9, class: sreg_32 } 401 - { id: 10, class: sreg_128 } 402 - { id: 11, class: vgpr_32 } 403 - { id: 12, class: vgpr_32 } 404 - { id: 13, class: vgpr_32 } 405 - { id: 14, class: vgpr_32 } 406 - { id: 15, class: vgpr_32 } 407frameInfo: 408 isFrameAddressTaken: false 409 isReturnAddressTaken: false 410 hasStackMap: false 411 hasPatchPoint: false 412 stackSize: 0 413 offsetAdjustment: 0 414 maxAlignment: 0 415 adjustsStack: false 416 hasCalls: false 417 maxCallFrameSize: 0 418 hasOpaqueSPAdjustment: false 419 hasVAStart: false 420 hasMustTailInVarArgFunc: false 421body: | 422 bb.0 (%ir-block.0): 423 %4 = IMPLICIT_DEF 424 %5 = COPY %4.sub1 425 %6 = IMPLICIT_DEF 426 %7 = COPY %6.sub0 427 %8 = S_MOV_B32 61440 428 %9 = S_MOV_B32 -1 429 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 430 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 431 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 432 %13 = V_MOV_B32_e32 1, implicit $exec 433 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec 434 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec 435 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 436 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 437 S_ENDPGM 438 439... 440--- 441 442# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use 443# CHECK: %14:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec 444# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $exec 445# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $exec 446# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $exec 447 448name: add_i32_m2_one_f32_use_multi_f16_use 449alignment: 0 450exposesReturnsTwice: false 451legalized: false 452regBankSelected: false 453selected: false 454tracksRegLiveness: true 455registers: 456 - { id: 0, class: sreg_64 } 457 - { id: 1, class: sreg_32 } 458 - { id: 2, class: sgpr_32 } 459 - { id: 3, class: vgpr_32 } 460 - { id: 4, class: sreg_64 } 461 - { id: 5, class: sreg_32 } 462 - { id: 6, class: sreg_64 } 463 - { id: 7, class: sreg_32 } 464 - { id: 8, class: sreg_32 } 465 - { id: 9, class: sreg_32 } 466 - { id: 10, class: sreg_128 } 467 - { id: 11, class: vgpr_32 } 468 - { id: 12, class: vgpr_32 } 469 - { id: 13, class: vgpr_32 } 470 - { id: 14, class: vgpr_32 } 471 - { id: 15, class: vgpr_32 } 472 - { id: 16, class: vgpr_32 } 473 - { id: 17, class: vgpr_32 } 474frameInfo: 475 isFrameAddressTaken: false 476 isReturnAddressTaken: false 477 hasStackMap: false 478 hasPatchPoint: false 479 stackSize: 0 480 offsetAdjustment: 0 481 maxAlignment: 0 482 adjustsStack: false 483 hasCalls: false 484 maxCallFrameSize: 0 485 hasOpaqueSPAdjustment: false 486 hasVAStart: false 487 hasMustTailInVarArgFunc: false 488body: | 489 bb.0 (%ir-block.0): 490 %4 = IMPLICIT_DEF 491 %5 = COPY %4.sub1 492 %6 = IMPLICIT_DEF 493 %7 = COPY %6.sub0 494 %8 = S_MOV_B32 61440 495 %9 = S_MOV_B32 -1 496 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 497 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 498 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 499 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 500 %14 = V_MOV_B32_e32 -2, implicit $exec 501 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec 502 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec 503 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec 504 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 505 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 506 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 507 S_ENDPGM 508 509... 510--- 511 512# f32 1.0 should be folded for the single f32 use as an inline 513# constant, and not folded as a multi-use literal for the f16 cases 514 515# CHECK-LABEL: name: add_f16_1.0_multi_f32_use 516# CHECK: %13:vgpr_32 = V_MOV_B32_e32 15360, implicit $exec 517# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec 518# CHECK: %15:vgpr_32 = V_ADD_F32_e32 %12, %13, implicit $exec 519 520name: add_f16_1.0_multi_f32_use 521alignment: 0 522exposesReturnsTwice: false 523legalized: false 524regBankSelected: false 525selected: false 526tracksRegLiveness: true 527registers: 528 - { id: 0, class: sreg_64 } 529 - { id: 1, class: sreg_32 } 530 - { id: 2, class: sgpr_32 } 531 - { id: 3, class: vgpr_32 } 532 - { id: 4, class: sreg_64 } 533 - { id: 5, class: sreg_32 } 534 - { id: 6, class: sreg_64 } 535 - { id: 7, class: sreg_32 } 536 - { id: 8, class: sreg_32 } 537 - { id: 9, class: sreg_32 } 538 - { id: 10, class: sreg_128 } 539 - { id: 11, class: vgpr_32 } 540 - { id: 12, class: vgpr_32 } 541 - { id: 13, class: vgpr_32 } 542 - { id: 14, class: vgpr_32 } 543 - { id: 15, class: vgpr_32 } 544frameInfo: 545 isFrameAddressTaken: false 546 isReturnAddressTaken: false 547 hasStackMap: false 548 hasPatchPoint: false 549 stackSize: 0 550 offsetAdjustment: 0 551 maxAlignment: 0 552 adjustsStack: false 553 hasCalls: false 554 maxCallFrameSize: 0 555 hasOpaqueSPAdjustment: false 556 hasVAStart: false 557 hasMustTailInVarArgFunc: false 558body: | 559 bb.0 (%ir-block.0): 560 %4 = IMPLICIT_DEF 561 %5 = COPY %4.sub1 562 %6 = IMPLICIT_DEF 563 %7 = COPY %6.sub0 564 %8 = S_MOV_B32 61440 565 %9 = S_MOV_B32 -1 566 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 567 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 568 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 569 %13 = V_MOV_B32_e32 15360, implicit $exec 570 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec 571 %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec 572 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 573 BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 574 S_ENDPGM 575 576... 577--- 578 579# The low 16-bits are an inline immediate, but the high bits are junk 580# FIXME: Should be able to fold this 581 582# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use 583# CHECK: %13:vgpr_32 = V_MOV_B32_e32 80886784, implicit $exec 584# CHECK: %14:vgpr_32 = V_ADD_F16_e32 %11, %13, implicit $exec 585# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec 586 587name: add_f16_1.0_other_high_bits_multi_f16_use 588alignment: 0 589exposesReturnsTwice: false 590legalized: false 591regBankSelected: false 592selected: false 593tracksRegLiveness: true 594registers: 595 - { id: 0, class: sreg_64 } 596 - { id: 1, class: sreg_32 } 597 - { id: 2, class: sgpr_32 } 598 - { id: 3, class: vgpr_32 } 599 - { id: 4, class: sreg_64 } 600 - { id: 5, class: sreg_32 } 601 - { id: 6, class: sreg_64 } 602 - { id: 7, class: sreg_32 } 603 - { id: 8, class: sreg_32 } 604 - { id: 9, class: sreg_32 } 605 - { id: 10, class: sreg_128 } 606 - { id: 11, class: vgpr_32 } 607 - { id: 12, class: vgpr_32 } 608 - { id: 13, class: vgpr_32 } 609 - { id: 14, class: vgpr_32 } 610 - { id: 15, class: vgpr_32 } 611frameInfo: 612 isFrameAddressTaken: false 613 isReturnAddressTaken: false 614 hasStackMap: false 615 hasPatchPoint: false 616 stackSize: 0 617 offsetAdjustment: 0 618 maxAlignment: 0 619 adjustsStack: false 620 hasCalls: false 621 maxCallFrameSize: 0 622 hasOpaqueSPAdjustment: false 623 hasVAStart: false 624 hasMustTailInVarArgFunc: false 625body: | 626 bb.0 (%ir-block.0): 627 %4 = IMPLICIT_DEF 628 %5 = COPY %4.sub1 629 %6 = IMPLICIT_DEF 630 %7 = COPY %6.sub0 631 %8 = S_MOV_B32 61440 632 %9 = S_MOV_B32 -1 633 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 634 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 635 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 636 %13 = V_MOV_B32_e32 80886784, implicit $exec 637 %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec 638 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec 639 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 640 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 641 S_ENDPGM 642 643... 644--- 645 646# FIXME: Should fold inline immediate into f16 and literal use into 647# f32 instruction. 648 649# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32 650# CHECK: %13:vgpr_32 = V_MOV_B32_e32 305413120, implicit $exec 651# CHECK: %14:vgpr_32 = V_ADD_F32_e32 %11, %13, implicit $exec 652# CHECK: %15:vgpr_32 = V_ADD_F16_e32 %12, %13, implicit $exec 653name: add_f16_1.0_other_high_bits_use_f16_f32 654alignment: 0 655exposesReturnsTwice: false 656legalized: false 657regBankSelected: false 658selected: false 659tracksRegLiveness: true 660registers: 661 - { id: 0, class: sreg_64 } 662 - { id: 1, class: sreg_32 } 663 - { id: 2, class: sgpr_32 } 664 - { id: 3, class: vgpr_32 } 665 - { id: 4, class: sreg_64 } 666 - { id: 5, class: sreg_32 } 667 - { id: 6, class: sreg_64 } 668 - { id: 7, class: sreg_32 } 669 - { id: 8, class: sreg_32 } 670 - { id: 9, class: sreg_32 } 671 - { id: 10, class: sreg_128 } 672 - { id: 11, class: vgpr_32 } 673 - { id: 12, class: vgpr_32 } 674 - { id: 13, class: vgpr_32 } 675 - { id: 14, class: vgpr_32 } 676 - { id: 15, class: vgpr_32 } 677frameInfo: 678 isFrameAddressTaken: false 679 isReturnAddressTaken: false 680 hasStackMap: false 681 hasPatchPoint: false 682 stackSize: 0 683 offsetAdjustment: 0 684 maxAlignment: 0 685 adjustsStack: false 686 hasCalls: false 687 maxCallFrameSize: 0 688 hasOpaqueSPAdjustment: false 689 hasVAStart: false 690 hasMustTailInVarArgFunc: false 691body: | 692 bb.0 (%ir-block.0): 693 %4 = IMPLICIT_DEF 694 %5 = COPY %4.sub1 695 %6 = IMPLICIT_DEF 696 %7 = COPY %6.sub0 697 %8 = S_MOV_B32 61440 698 %9 = S_MOV_B32 -1 699 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 700 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`) 701 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`) 702 %13 = V_MOV_B32_e32 305413120, implicit $exec 703 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec 704 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec 705 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`) 706 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`) 707 S_ENDPGM 708 709... 710