1; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 3 4; SI-LABEL: {{^}}s_movk_i32_k0: 5; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff{{$}} 6; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 7; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 8; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] 9; SI: s_endpgm 10define amdgpu_kernel void @s_movk_i32_k0(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 11 %loada = load i64, i64 addrspace(1)* %a, align 4 12 %or = or i64 %loada, 4295032831 ; ((1 << 16) - 1) | (1 << 32) 13 store i64 %or, i64 addrspace(1)* %out 14 call void asm sideeffect "; use $0", "s"(i64 4295032831) 15 ret void 16} 17 18; SI-LABEL: {{^}}s_movk_i32_k1: 19; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} 20; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 21; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 22; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] 23; SI: s_endpgm 24define amdgpu_kernel void @s_movk_i32_k1(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 25 %loada = load i64, i64 addrspace(1)* %a, align 4 26 %or = or i64 %loada, 4295000063 ; ((1 << 15) - 1) | (1 << 32) 27 store i64 %or, i64 addrspace(1)* %out 28 call void asm sideeffect "; use $0", "s"(i64 4295000063) 29 ret void 30} 31 32; SI-LABEL: {{^}}s_movk_i32_k2: 33; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x7fff{{$}} 34; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 35; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 36; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 64, v[[HI_VREG]] 37; SI: s_endpgm 38define amdgpu_kernel void @s_movk_i32_k2(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 39 %loada = load i64, i64 addrspace(1)* %a, align 4 40 %or = or i64 %loada, 274877939711 ; ((1 << 15) - 1) | (64 << 32) 41 store i64 %or, i64 addrspace(1)* %out 42 call void asm sideeffect "; use $0", "s"(i64 274877939711) 43 ret void 44} 45 46; SI-LABEL: {{^}}s_movk_i32_k3: 47; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} 48; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 49; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 50; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] 51; SI: s_endpgm 52define amdgpu_kernel void @s_movk_i32_k3(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 53 %loada = load i64, i64 addrspace(1)* %a, align 4 54 %or = or i64 %loada, 4295000064 ; (1 << 15) | (1 << 32) 55 store i64 %or, i64 addrspace(1)* %out 56 call void asm sideeffect "; use $0", "s"(i64 4295000064) 57 ret void 58} 59 60; SI-LABEL: {{^}}s_movk_i32_k4: 61; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0x20000{{$}} 62; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 63; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 64; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 1, v[[HI_VREG]] 65; SI: s_endpgm 66define amdgpu_kernel void @s_movk_i32_k4(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 67 %loada = load i64, i64 addrspace(1)* %a, align 4 68 %or = or i64 %loada, 4295098368 ; (1 << 17) | (1 << 32) 69 store i64 %or, i64 addrspace(1)* %out 70 call void asm sideeffect "; use $0", "s"(i64 4295098368) 71 ret void 72} 73 74; SI-LABEL: {{^}}s_movk_i32_k5: 75; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0xffef{{$}} 76; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0xff00ffff{{$}} 77; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 78; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 79; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 80; SI: s_endpgm 81define amdgpu_kernel void @s_movk_i32_k5(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 82 %loada = load i64, i64 addrspace(1)* %a, align 4 83 %or = or i64 %loada, 18374967954648334319 ; -17 & 0xff00ffffffffffff 84 store i64 %or, i64 addrspace(1)* %out 85 call void asm sideeffect "; use $0", "s"(i64 18374967954648334319) 86 ret void 87} 88 89; SI-LABEL: {{^}}s_movk_i32_k6: 90; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x41{{$}} 91; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 92; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 93; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, 63, v[[HI_VREG]] 94; SI: s_endpgm 95define amdgpu_kernel void @s_movk_i32_k6(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 96 %loada = load i64, i64 addrspace(1)* %a, align 4 97 %or = or i64 %loada, 270582939713 ; 65 | (63 << 32) 98 store i64 %or, i64 addrspace(1)* %out 99 call void asm sideeffect "; use $0", "s"(i64 270582939713) 100 ret void 101} 102 103; SI-LABEL: {{^}}s_movk_i32_k7: 104; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x2000{{$}} 105; SI-DAG: s_movk_i32 [[HI_S_IMM:s[0-9]+]], 0x4000{{$}} 106; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 107; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 108; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 109; SI: s_endpgm 110define amdgpu_kernel void @s_movk_i32_k7(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 111 %loada = load i64, i64 addrspace(1)* %a, align 4 112 %or = or i64 %loada, 70368744185856; ((1 << 13)) | ((1 << 14) << 32) 113 store i64 %or, i64 addrspace(1)* %out 114 call void asm sideeffect "; use $0", "s"(i64 70368744185856) 115 ret void 116} 117 118; SI-LABEL: {{^}}s_movk_i32_k8: 119; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8000{{$}} 120; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} 121; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 122; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 123; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 124; SI: s_endpgm 125define amdgpu_kernel void @s_movk_i32_k8(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 126 %loada = load i64, i64 addrspace(1)* %a, align 4 127 %or = or i64 %loada, 1229782942255906816 ; 0x11111111ffff8000 128 store i64 %or, i64 addrspace(1)* %out 129 call void asm sideeffect "; use $0", "s"(i64 1229782942255906816) 130 ret void 131} 132 133; SI-LABEL: {{^}}s_movk_i32_k9: 134; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8001{{$}} 135; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} 136; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 137; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 138; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 139; SI: s_endpgm 140define amdgpu_kernel void @s_movk_i32_k9(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 141 %loada = load i64, i64 addrspace(1)* %a, align 4 142 %or = or i64 %loada, 1229782942255906817 ; 0x11111111ffff8001 143 store i64 %or, i64 addrspace(1)* %out 144 call void asm sideeffect "; use $0", "s"(i64 1229782942255906817) 145 ret void 146} 147 148; SI-LABEL: {{^}}s_movk_i32_k10: 149; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8888{{$}} 150; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} 151; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 152; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 153; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 154; SI: s_endpgm 155define amdgpu_kernel void @s_movk_i32_k10(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 156 %loada = load i64, i64 addrspace(1)* %a, align 4 157 %or = or i64 %loada, 1229782942255909000 ; 0x11111111ffff8888 158 store i64 %or, i64 addrspace(1)* %out 159 call void asm sideeffect "; use $0", "s"(i64 1229782942255909000) 160 ret void 161} 162 163; SI-LABEL: {{^}}s_movk_i32_k11: 164; SI-DAG: s_movk_i32 [[LO_S_IMM:s[0-9]+]], 0x8fff{{$}} 165; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} 166; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 167; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 168; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 169; SI: s_endpgm 170define amdgpu_kernel void @s_movk_i32_k11(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 171 %loada = load i64, i64 addrspace(1)* %a, align 4 172 %or = or i64 %loada, 1229782942255910911 ; 0x11111111ffff8fff 173 store i64 %or, i64 addrspace(1)* %out 174 call void asm sideeffect "; use $0", "s"(i64 1229782942255910911) 175 ret void 176} 177 178; SI-LABEL: {{^}}s_movk_i32_k12: 179; SI-DAG: s_mov_b32 [[LO_S_IMM:s[0-9]+]], 0xffff7001{{$}} 180; SI-DAG: s_mov_b32 [[HI_S_IMM:s[0-9]+]], 0x11111111{{$}} 181; SI-DAG: buffer_load_dwordx2 v{{\[}}[[LO_VREG:[0-9]+]]:[[HI_VREG:[0-9]+]]{{\]}}, 182; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[LO_S_IMM]], v[[LO_VREG]] 183; SI-DAG: v_or_b32_e32 {{v[0-9]+}}, [[HI_S_IMM]], v[[HI_VREG]] 184; SI: s_endpgm 185define amdgpu_kernel void @s_movk_i32_k12(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) { 186 %loada = load i64, i64 addrspace(1)* %a, align 4 187 %or = or i64 %loada, 1229782942255902721 ; 0x11111111ffff7001 188 store i64 %or, i64 addrspace(1)* %out 189 call void asm sideeffect "; use $0", "s"(i64 1229782942255902721) 190 ret void 191} 192