1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,FUNC %s 3; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=R600,FUNC %s 4 5; BFI_INT Definition pattern from ISA docs 6; (y & x) | (z & ~x) 7; 8; FUNC-LABEL: {{^}}bfi_def: 9; R600: BFI_INT 10 11; GCN-DAG: s_andn2_b32 12; GCN-DAG: s_and_b32 13; GCN: s_or_b32 14define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 15entry: 16 %0 = xor i32 %x, -1 17 %1 = and i32 %z, %0 18 %2 = and i32 %y, %x 19 %3 = or i32 %1, %2 20 store i32 %3, i32 addrspace(1)* %out 21 ret void 22} 23 24; SHA-256 Ch function 25; z ^ (x & (y ^ z)) 26; FUNC-LABEL: {{^}}bfi_sha256_ch: 27; R600: BFI_INT 28 29; GCN: s_xor_b32 30; GCN: s_and_b32 31; GCN: s_xor_b32 32define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 33entry: 34 %0 = xor i32 %y, %z 35 %1 = and i32 %x, %0 36 %2 = xor i32 %z, %1 37 store i32 %2, i32 addrspace(1)* %out 38 ret void 39} 40 41; SHA-256 Ma function 42; ((x & z) | (y & (x | z))) 43; FUNC-LABEL: {{^}}bfi_sha256_ma: 44; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W 45; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W 46 47; GCN: s_and_b32 48; GCN: s_or_b32 49; GCN: s_and_b32 50; GCN: s_or_b32 51define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { 52entry: 53 %0 = and i32 %x, %z 54 %1 = or i32 %x, %z 55 %2 = and i32 %y, %1 56 %3 = or i32 %0, %2 57 store i32 %3, i32 addrspace(1)* %out 58 ret void 59} 60 61; FUNC-LABEL: {{^}}v_bitselect_v2i32_pat1: 62; GCN: s_waitcnt 63; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4 64; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5 65; GCN-NEXT: s_setpc_b64 66define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) { 67 %xor.0 = xor <2 x i32> %a, %mask 68 %and = and <2 x i32> %xor.0, %b 69 %bitselect = xor <2 x i32> %and, %mask 70 ret <2 x i32> %bitselect 71} 72 73; FUNC-LABEL: {{^}}v_bitselect_i64_pat_0: 74; GCN: s_waitcnt 75; GCN-NEXT: v_bfi_b32 v1, v1, v3, v5 76; GCN-NEXT: v_bfi_b32 v0, v0, v2, v4 77; GCN-NEXT: s_setpc_b64 78define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 79 %and0 = and i64 %a, %b 80 %not.a = xor i64 %a, -1 81 %and1 = and i64 %not.a, %mask 82 %bitselect = or i64 %and0, %and1 83 ret i64 %bitselect 84} 85 86; FUNC-LABEL: {{^}}v_bitselect_i64_pat_1: 87; GCN: s_waitcnt 88; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5 89; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4 90; GCN-NEXT: s_setpc_b64 91define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 92 %xor.0 = xor i64 %a, %mask 93 %and = and i64 %xor.0, %b 94 %bitselect = xor i64 %and, %mask 95 ret i64 %bitselect 96} 97 98; FUNC-LABEL: {{^}}v_bitselect_i64_pat_2: 99; GCN: s_waitcnt 100; GCN-DAG: v_bfi_b32 v0, v2, v0, v4 101; GCN-DAG: v_bfi_b32 v1, v3, v1, v5 102; GCN-NEXT: s_setpc_b64 103define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 104 %xor.0 = xor i64 %a, %mask 105 %and = and i64 %xor.0, %b 106 %bitselect = xor i64 %and, %mask 107 ret i64 %bitselect 108} 109 110; FUNC-LABEL: {{^}}v_bfi_sha256_ma_i64: 111; GCN-DAG: v_xor_b32_e32 v1, v1, v3 112; GCN-DAG: v_xor_b32_e32 v0, v0, v2 113; GCN-DAG: v_bfi_b32 v1, v1, v5, v3 114; GCN-DAG: v_bfi_b32 v0, v0, v4, v2 115define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 116entry: 117 %and0 = and i64 %x, %z 118 %or0 = or i64 %x, %z 119 %and1 = and i64 %y, %or0 120 %or1 = or i64 %and0, %and1 121 ret i64 %or1 122} 123 124; FIXME: Should leave as 64-bit SALU ops 125; FUNC-LABEL: {{^}}s_bitselect_i64_pat_0: 126; GCN: s_and_b64 127; GCN: s_andn2_b64 128; GCN: s_or_b64 129define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { 130 %and0 = and i64 %a, %b 131 %not.a = xor i64 %a, -1 132 %and1 = and i64 %not.a, %mask 133 %bitselect = or i64 %and0, %and1 134 %scalar.use = add i64 %bitselect, 10 135 store i64 %scalar.use, i64 addrspace(1)* undef 136 ret void 137} 138 139; FUNC-LABEL: {{^}}s_bitselect_i64_pat_1: 140; GCN: s_xor_b64 141; GCN: s_and_b64 142; GCN: s_xor_b64 143define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { 144 %xor.0 = xor i64 %a, %mask 145 %and = and i64 %xor.0, %b 146 %bitselect = xor i64 %and, %mask 147 148 %scalar.use = add i64 %bitselect, 10 149 store i64 %scalar.use, i64 addrspace(1)* undef 150 ret void 151} 152 153; FUNC-LABEL: {{^}}s_bitselect_i64_pat_2: 154; GCN: s_xor_b64 155; GCN: s_and_b64 156; GCN: s_xor_b64 157define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { 158 %xor.0 = xor i64 %a, %mask 159 %and = and i64 %xor.0, %b 160 %bitselect = xor i64 %and, %mask 161 162 %scalar.use = add i64 %bitselect, 10 163 store i64 %scalar.use, i64 addrspace(1)* undef 164 ret void 165} 166 167; FUNC-LABEL: {{^}}s_bfi_sha256_ma_i64: 168; GCN: s_and_b64 169; GCN: s_or_b64 170; GCN: s_and_b64 171; GCN: s_or_b64 172define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) { 173entry: 174 %and0 = and i64 %x, %z 175 %or0 = or i64 %x, %z 176 %and1 = and i64 %y, %or0 177 %or1 = or i64 %and0, %and1 178 179 %scalar.use = add i64 %or1, 10 180 store i64 %scalar.use, i64 addrspace(1)* undef 181 ret void 182} 183