1# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-fold-operands %s -o - | FileCheck -check-prefix=GCN %s 2--- 3# GCN-LABEL: name: v_max_self_clamp_not_set_f32 4# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 5# GCN-NEXT: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec 6 7name: v_max_self_clamp_not_set_f32 8tracksRegLiveness: true 9registers: 10 - { id: 0, class: sgpr_64 } 11 - { id: 1, class: sreg_32_xm0 } 12 - { id: 2, class: sgpr_32 } 13 - { id: 3, class: vgpr_32 } 14 - { id: 4, class: sreg_64_xexec } 15 - { id: 5, class: sreg_64_xexec } 16 - { id: 6, class: sreg_32 } 17 - { id: 7, class: sreg_32 } 18 - { id: 8, class: sreg_32_xm0 } 19 - { id: 9, class: sreg_64 } 20 - { id: 10, class: sreg_32_xm0 } 21 - { id: 11, class: sreg_32_xm0 } 22 - { id: 12, class: sgpr_64 } 23 - { id: 13, class: sgpr_128 } 24 - { id: 14, class: sreg_32_xm0 } 25 - { id: 15, class: sreg_64 } 26 - { id: 16, class: sgpr_128 } 27 - { id: 17, class: vgpr_32 } 28 - { id: 18, class: vreg_64 } 29 - { id: 19, class: vgpr_32 } 30 - { id: 20, class: vgpr_32 } 31 - { id: 21, class: vgpr_32 } 32 - { id: 22, class: vgpr_32 } 33 - { id: 23, class: vreg_64 } 34 - { id: 24, class: vgpr_32 } 35 - { id: 25, class: vreg_64 } 36 - { id: 26, class: vreg_64 } 37liveins: 38 - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' } 39 - { reg: '$vgpr0', virtual-reg: '%3' } 40body: | 41 bb.0: 42 liveins: $sgpr0_sgpr1, $vgpr0 43 44 %3 = COPY $vgpr0 45 %0 = COPY $sgpr0_sgpr1 46 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0:: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 47 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 48 %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec 49 %25 = REG_SEQUENCE %3, 1, %24, 2 50 %10 = S_MOV_B32 61440 51 %11 = S_MOV_B32 0 52 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2 53 %13 = REG_SEQUENCE killed %5, 17, %12, 18 54 %14 = S_MOV_B32 2 55 %26 = V_LSHL_B64 killed %25, 2, implicit $exec 56 %16 = REG_SEQUENCE killed %4, 17, %12, 18 57 %18 = COPY %26 58 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec 59 %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 60 %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $mode, implicit $exec 61 BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec 62 S_ENDPGM 0 63 64... 65--- 66# GCN-LABEL: name: v_clamp_omod_already_set_f32 67# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 68# GCN: %21:vgpr_32 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec 69name: v_clamp_omod_already_set_f32 70tracksRegLiveness: true 71registers: 72 - { id: 0, class: sgpr_64 } 73 - { id: 1, class: sreg_32_xm0 } 74 - { id: 2, class: sgpr_32 } 75 - { id: 3, class: vgpr_32 } 76 - { id: 4, class: sreg_64_xexec } 77 - { id: 5, class: sreg_64_xexec } 78 - { id: 6, class: sreg_32 } 79 - { id: 7, class: sreg_32 } 80 - { id: 8, class: sreg_32_xm0 } 81 - { id: 9, class: sreg_64 } 82 - { id: 10, class: sreg_32_xm0 } 83 - { id: 11, class: sreg_32_xm0 } 84 - { id: 12, class: sgpr_64 } 85 - { id: 13, class: sgpr_128 } 86 - { id: 14, class: sreg_32_xm0 } 87 - { id: 15, class: sreg_64 } 88 - { id: 16, class: sgpr_128 } 89 - { id: 17, class: vgpr_32 } 90 - { id: 18, class: vreg_64 } 91 - { id: 19, class: vgpr_32 } 92 - { id: 20, class: vgpr_32 } 93 - { id: 21, class: vgpr_32 } 94 - { id: 22, class: vgpr_32 } 95 - { id: 23, class: vreg_64 } 96 - { id: 24, class: vgpr_32 } 97 - { id: 25, class: vreg_64 } 98 - { id: 26, class: vreg_64 } 99liveins: 100 - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' } 101 - { reg: '$vgpr0', virtual-reg: '%3' } 102body: | 103 bb.0: 104 liveins: $sgpr0_sgpr1, $vgpr0 105 106 %3 = COPY $vgpr0 107 %0 = COPY $sgpr0_sgpr1 108 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 109 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 110 %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec 111 %25 = REG_SEQUENCE %3, 1, %24, 2 112 %10 = S_MOV_B32 61440 113 %11 = S_MOV_B32 0 114 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2 115 %13 = REG_SEQUENCE killed %5, 17, %12, 18 116 %14 = S_MOV_B32 2 117 %26 = V_LSHL_B64 killed %25, 2, implicit $exec 118 %16 = REG_SEQUENCE killed %4, 17, %12, 18 119 %18 = COPY %26 120 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec 121 %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 122 %21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $mode, implicit $exec 123 BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec 124 S_ENDPGM 0 125... 126--- 127# Don't fold a mul that looks like an omod if itself has omod set 128 129# GCN-LABEL: name: v_omod_mul_omod_already_set_f32 130# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 131# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec 132name: v_omod_mul_omod_already_set_f32 133tracksRegLiveness: true 134registers: 135 - { id: 0, class: sgpr_64 } 136 - { id: 1, class: sreg_32_xm0 } 137 - { id: 2, class: sgpr_32 } 138 - { id: 3, class: vgpr_32 } 139 - { id: 4, class: sreg_64_xexec } 140 - { id: 5, class: sreg_64_xexec } 141 - { id: 6, class: sreg_32 } 142 - { id: 7, class: sreg_32 } 143 - { id: 8, class: sreg_32_xm0 } 144 - { id: 9, class: sreg_64 } 145 - { id: 10, class: sreg_32_xm0 } 146 - { id: 11, class: sreg_32_xm0 } 147 - { id: 12, class: sgpr_64 } 148 - { id: 13, class: sgpr_128 } 149 - { id: 14, class: sreg_32_xm0 } 150 - { id: 15, class: sreg_64 } 151 - { id: 16, class: sgpr_128 } 152 - { id: 17, class: vgpr_32 } 153 - { id: 18, class: vreg_64 } 154 - { id: 19, class: vgpr_32 } 155 - { id: 20, class: vgpr_32 } 156 - { id: 21, class: vgpr_32 } 157 - { id: 22, class: vgpr_32 } 158 - { id: 23, class: vreg_64 } 159 - { id: 24, class: vgpr_32 } 160 - { id: 25, class: vreg_64 } 161 - { id: 26, class: vreg_64 } 162liveins: 163 - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' } 164 - { reg: '$vgpr0', virtual-reg: '%3' } 165body: | 166 bb.0: 167 liveins: $sgpr0_sgpr1, $vgpr0 168 169 %3 = COPY $vgpr0 170 %0 = COPY $sgpr0_sgpr1 171 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 172 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 173 %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec 174 %25 = REG_SEQUENCE %3, 1, %24, 2 175 %10 = S_MOV_B32 61440 176 %11 = S_MOV_B32 0 177 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2 178 %13 = REG_SEQUENCE killed %5, 17, %12, 18 179 %14 = S_MOV_B32 2 180 %26 = V_LSHL_B64 killed %25, 2, implicit $exec 181 %16 = REG_SEQUENCE killed %4, 17, %12, 18 182 %18 = COPY %26 183 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec 184 %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 185 %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $mode, implicit $exec 186 BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec 187 S_ENDPGM 0 188 189... 190--- 191# Don't fold a mul that looks like an omod if itself has clamp set 192# This might be OK, but would require folding the clamp at the same time. 193# GCN-LABEL: name: v_omod_mul_clamp_already_set_f32 194# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 195# GCN-NEXT: %21:vgpr_32 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec 196 197name: v_omod_mul_clamp_already_set_f32 198tracksRegLiveness: true 199registers: 200 - { id: 0, class: sgpr_64 } 201 - { id: 1, class: sreg_32_xm0 } 202 - { id: 2, class: sgpr_32 } 203 - { id: 3, class: vgpr_32 } 204 - { id: 4, class: sreg_64_xexec } 205 - { id: 5, class: sreg_64_xexec } 206 - { id: 6, class: sreg_32 } 207 - { id: 7, class: sreg_32 } 208 - { id: 8, class: sreg_32_xm0 } 209 - { id: 9, class: sreg_64 } 210 - { id: 10, class: sreg_32_xm0 } 211 - { id: 11, class: sreg_32_xm0 } 212 - { id: 12, class: sgpr_64 } 213 - { id: 13, class: sgpr_128 } 214 - { id: 14, class: sreg_32_xm0 } 215 - { id: 15, class: sreg_64 } 216 - { id: 16, class: sgpr_128 } 217 - { id: 17, class: vgpr_32 } 218 - { id: 18, class: vreg_64 } 219 - { id: 19, class: vgpr_32 } 220 - { id: 20, class: vgpr_32 } 221 - { id: 21, class: vgpr_32 } 222 - { id: 22, class: vgpr_32 } 223 - { id: 23, class: vreg_64 } 224 - { id: 24, class: vgpr_32 } 225 - { id: 25, class: vreg_64 } 226 - { id: 26, class: vreg_64 } 227liveins: 228 - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' } 229 - { reg: '$vgpr0', virtual-reg: '%3' } 230body: | 231 bb.0: 232 liveins: $sgpr0_sgpr1, $vgpr0 233 234 %3 = COPY $vgpr0 235 %0 = COPY $sgpr0_sgpr1 236 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 237 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 238 %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec 239 %25 = REG_SEQUENCE %3, 1, %24, 2 240 %10 = S_MOV_B32 61440 241 %11 = S_MOV_B32 0 242 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2 243 %13 = REG_SEQUENCE killed %5, 17, %12, 18 244 %14 = S_MOV_B32 2 245 %26 = V_LSHL_B64 killed %25, 2, implicit $exec 246 %16 = REG_SEQUENCE killed %4, 17, %12, 18 247 %18 = COPY %26 248 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec 249 %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 250 %21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $mode, implicit $exec 251 BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec 252 S_ENDPGM 0 253 254... 255 256 257 258 259 260 261 262 263 264 265 266 267 268--- 269# Don't fold a mul that looks like an omod if itself has omod set 270 271# GCN-LABEL: name: v_omod_add_omod_already_set_f32 272# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 273# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec 274name: v_omod_add_omod_already_set_f32 275tracksRegLiveness: true 276registers: 277 - { id: 0, class: sgpr_64 } 278 - { id: 1, class: sreg_32_xm0 } 279 - { id: 2, class: sgpr_32 } 280 - { id: 3, class: vgpr_32 } 281 - { id: 4, class: sreg_64_xexec } 282 - { id: 5, class: sreg_64_xexec } 283 - { id: 6, class: sreg_32 } 284 - { id: 7, class: sreg_32 } 285 - { id: 8, class: sreg_32_xm0 } 286 - { id: 9, class: sreg_64 } 287 - { id: 10, class: sreg_32_xm0 } 288 - { id: 11, class: sreg_32_xm0 } 289 - { id: 12, class: sgpr_64 } 290 - { id: 13, class: sgpr_128 } 291 - { id: 14, class: sreg_32_xm0 } 292 - { id: 15, class: sreg_64 } 293 - { id: 16, class: sgpr_128 } 294 - { id: 17, class: vgpr_32 } 295 - { id: 18, class: vreg_64 } 296 - { id: 19, class: vgpr_32 } 297 - { id: 20, class: vgpr_32 } 298 - { id: 21, class: vgpr_32 } 299 - { id: 22, class: vgpr_32 } 300 - { id: 23, class: vreg_64 } 301 - { id: 24, class: vgpr_32 } 302 - { id: 25, class: vreg_64 } 303 - { id: 26, class: vreg_64 } 304liveins: 305 - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' } 306 - { reg: '$vgpr0', virtual-reg: '%3' } 307body: | 308 bb.0: 309 liveins: $sgpr0_sgpr1, $vgpr0 310 311 %3 = COPY $vgpr0 312 %0 = COPY $sgpr0_sgpr1 313 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 314 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 315 %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec 316 %25 = REG_SEQUENCE %3, 1, %24, 2 317 %10 = S_MOV_B32 61440 318 %11 = S_MOV_B32 0 319 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2 320 %13 = REG_SEQUENCE killed %5, 17, %12, 18 321 %14 = S_MOV_B32 2 322 %26 = V_LSHL_B64 killed %25, 2, implicit $exec 323 %16 = REG_SEQUENCE killed %4, 17, %12, 18 324 %18 = COPY %26 325 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec 326 %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 327 %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $mode, implicit $exec 328 BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec 329 S_ENDPGM 0 330 331... 332--- 333# Don't fold a mul that looks like an omod if itself has clamp set 334# This might be OK, but would require folding the clamp at the same time. 335# GCN-LABEL: name: v_omod_add_clamp_already_set_f32 336# GCN: %20:vgpr_32 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 337# GCN-NEXT: %21:vgpr_32 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec 338 339name: v_omod_add_clamp_already_set_f32 340tracksRegLiveness: true 341registers: 342 - { id: 0, class: sgpr_64 } 343 - { id: 1, class: sreg_32_xm0 } 344 - { id: 2, class: sgpr_32 } 345 - { id: 3, class: vgpr_32 } 346 - { id: 4, class: sreg_64_xexec } 347 - { id: 5, class: sreg_64_xexec } 348 - { id: 6, class: sreg_32 } 349 - { id: 7, class: sreg_32 } 350 - { id: 8, class: sreg_32_xm0 } 351 - { id: 9, class: sreg_64 } 352 - { id: 10, class: sreg_32_xm0 } 353 - { id: 11, class: sreg_32_xm0 } 354 - { id: 12, class: sgpr_64 } 355 - { id: 13, class: sgpr_128 } 356 - { id: 14, class: sreg_32_xm0 } 357 - { id: 15, class: sreg_64 } 358 - { id: 16, class: sgpr_128 } 359 - { id: 17, class: vgpr_32 } 360 - { id: 18, class: vreg_64 } 361 - { id: 19, class: vgpr_32 } 362 - { id: 20, class: vgpr_32 } 363 - { id: 21, class: vgpr_32 } 364 - { id: 22, class: vgpr_32 } 365 - { id: 23, class: vreg_64 } 366 - { id: 24, class: vgpr_32 } 367 - { id: 25, class: vreg_64 } 368 - { id: 26, class: vreg_64 } 369liveins: 370 - { reg: '$sgpr0_sgpr1', virtual-reg: '%0' } 371 - { reg: '$vgpr0', virtual-reg: '%3' } 372body: | 373 bb.0: 374 liveins: $sgpr0_sgpr1, $vgpr0 375 376 %3 = COPY $vgpr0 377 %0 = COPY $sgpr0_sgpr1 378 %4 = S_LOAD_DWORDX2_IMM %0, 9, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 379 %5 = S_LOAD_DWORDX2_IMM %0, 11, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`) 380 %24 = V_ASHRREV_I32_e32 31, %3, implicit $exec 381 %25 = REG_SEQUENCE %3, 1, %24, 2 382 %10 = S_MOV_B32 61440 383 %11 = S_MOV_B32 0 384 %12 = REG_SEQUENCE killed %11, 1, killed %10, 2 385 %13 = REG_SEQUENCE killed %5, 17, %12, 18 386 %14 = S_MOV_B32 2 387 %26 = V_LSHL_B64 killed %25, 2, implicit $exec 388 %16 = REG_SEQUENCE killed %4, 17, %12, 18 389 %18 = COPY %26 390 %17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec 391 %20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $mode, implicit $exec 392 %21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $mode, implicit $exec 393 BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec 394 S_ENDPGM 0 395 396... 397--- 398 399# Pass used to crash with immediate second operand of max 400name: v_max_reg_imm_f32 401tracksRegLiveness: true 402registers: 403 - { id: 0, class: vgpr_32 } 404 - { id: 1, class: vgpr_32 } 405body: | 406 bb.0: 407 liveins: $vgpr0 408 409 %0 = COPY $vgpr0 410 %1 = V_MAX_F32_e64 0, killed %0, 0, 1056964608, 1, 0, implicit $mode, implicit $exec 411 412... 413