1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-fast -verify-machineinstrs -o - %s | FileCheck %s 3# XUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -regbankselect-greedy -verify-machineinstrs -o - %s | FileCheck %s 4 5--- 6name: buffer_load_ss 7legalized: true 8tracksRegLiveness: true 9body: | 10 bb.0: 11 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 12 13 ; CHECK-LABEL: name: buffer_load_ss 14 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 15 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 16 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr4 17 ; CHECK: [[AMDGPU_S_BUFFER_LOAD:%[0-9]+]]:sgpr(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD [[COPY]](<4 x s32>), [[COPY1]](s32), 0 18 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 19 %1:_(s32) = COPY $sgpr4 20 %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 21 22... 23 24--- 25name: buffer_load_sv 26legalized: true 27tracksRegLiveness: true 28body: | 29 bb.0: 30 liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 31 32 ; CHECK-LABEL: name: buffer_load_sv 33 ; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 34 ; CHECK: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 35 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 36 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 37 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 38 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 39 %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 40 %1:_(s32) = COPY $vgpr0 41 %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 42 43... 44 45--- 46name: buffer_load_vs 47legalized: true 48tracksRegLiveness: true 49body: | 50 bb.0: 51 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0 52 53 ; CHECK-LABEL: name: buffer_load_vs 54 ; CHECK: successors: %bb.1(0x80000000) 55 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr0 56 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 57 ; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 58 ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) 59 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 60 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 61 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 62 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) 63 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 64 ; CHECK: .1: 65 ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) 66 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %9, %bb.1 67 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 68 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 69 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 70 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 71 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 72 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 73 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 74 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 75 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 76 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 77 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY2]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 78 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 79 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 80 ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec 81 ; CHECK: .2: 82 ; CHECK: successors: %bb.3(0x80000000) 83 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 84 ; CHECK: .3: 85 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 86 %1:_(s32) = COPY $sgpr0 87 %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 88 89... 90 91--- 92name: buffer_load_vv 93legalized: true 94tracksRegLiveness: true 95body: | 96 bb.0: 97 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 98 99 ; CHECK-LABEL: name: buffer_load_vv 100 ; CHECK: successors: %bb.1(0x80000000) 101 ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 102 ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 103 ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr4 104 ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 105 ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 106 ; CHECK: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF 107 ; CHECK: [[UV:%[0-9]+]]:vreg_64(s64), [[UV1:%[0-9]+]]:vreg_64(s64) = G_UNMERGE_VALUES [[COPY]](<4 x s32>) 108 ; CHECK: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec 109 ; CHECK: .1: 110 ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) 111 ; CHECK: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[DEF]], %bb.0, %8, %bb.1 112 ; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub0(s64), implicit $exec 113 ; CHECK: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV]].sub1(s64), implicit $exec 114 ; CHECK: [[MV:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32) 115 ; CHECK: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV]](s64), [[UV]](s64), implicit $exec 116 ; CHECK: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub0(s64), implicit $exec 117 ; CHECK: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0(s32) = V_READFIRSTLANE_B32 [[UV1]].sub1(s64), implicit $exec 118 ; CHECK: [[MV1:%[0-9]+]]:sreg_64_xexec(s64) = G_MERGE_VALUES [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 119 ; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[MV1]](s64), [[UV1]](s64), implicit $exec 120 ; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc 121 ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<4 x s32>) = G_BUILD_VECTOR [[V_READFIRSTLANE_B32_]](s32), [[V_READFIRSTLANE_B32_1]](s32), [[V_READFIRSTLANE_B32_2]](s32), [[V_READFIRSTLANE_B32_3]](s32) 122 ; CHECK: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 16, align 4) 123 ; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec 124 ; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 125 ; CHECK: S_CBRANCH_EXECNZ %bb.1, implicit $exec 126 ; CHECK: .2: 127 ; CHECK: successors: %bb.3(0x80000000) 128 ; CHECK: $exec = S_MOV_B64_term [[S_MOV_B64_term]] 129 ; CHECK: .3: 130 %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 131 %1:_(s32) = COPY $vgpr4 132 %2:_(<4 x s32>) = G_AMDGPU_S_BUFFER_LOAD %0, %1, 0 133 134... 135