1; Test vector insertion of byte-swapped memory values. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5declare i16 @llvm.bswap.i16(i16) 6declare i32 @llvm.bswap.i32(i32) 7declare i64 @llvm.bswap.i64(i64) 8declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 9declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 10declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 11 12; Test v8i16 insertion into the first element. 13define <8 x i16> @f1(<8 x i16> %val, i16 *%ptr) { 14; CHECK-LABEL: f1: 15; CHECK: vlebrh %v24, 0(%r2), 0 16; CHECK: br %r14 17 %element = load i16, i16 *%ptr 18 %swap = call i16 @llvm.bswap.i16(i16 %element) 19 %ret = insertelement <8 x i16> %val, i16 %swap, i32 0 20 ret <8 x i16> %ret 21} 22 23; Test v8i16 insertion into the last element. 24define <8 x i16> @f2(<8 x i16> %val, i16 *%ptr) { 25; CHECK-LABEL: f2: 26; CHECK: vlebrh %v24, 0(%r2), 7 27; CHECK: br %r14 28 %element = load i16, i16 *%ptr 29 %swap = call i16 @llvm.bswap.i16(i16 %element) 30 %ret = insertelement <8 x i16> %val, i16 %swap, i32 7 31 ret <8 x i16> %ret 32} 33 34; Test v8i16 insertion with the highest in-range offset. 35define <8 x i16> @f3(<8 x i16> %val, i16 *%base) { 36; CHECK-LABEL: f3: 37; CHECK: vlebrh %v24, 4094(%r2), 5 38; CHECK: br %r14 39 %ptr = getelementptr i16, i16 *%base, i32 2047 40 %element = load i16, i16 *%ptr 41 %swap = call i16 @llvm.bswap.i16(i16 %element) 42 %ret = insertelement <8 x i16> %val, i16 %swap, i32 5 43 ret <8 x i16> %ret 44} 45 46; Test v8i16 insertion with the first ouf-of-range offset. 47define <8 x i16> @f4(<8 x i16> %val, i16 *%base) { 48; CHECK-LABEL: f4: 49; CHECK: aghi %r2, 4096 50; CHECK: vlebrh %v24, 0(%r2), 1 51; CHECK: br %r14 52 %ptr = getelementptr i16, i16 *%base, i32 2048 53 %element = load i16, i16 *%ptr 54 %swap = call i16 @llvm.bswap.i16(i16 %element) 55 %ret = insertelement <8 x i16> %val, i16 %swap, i32 1 56 ret <8 x i16> %ret 57} 58 59; Test v8i16 insertion into a variable element. 60define <8 x i16> @f5(<8 x i16> %val, i16 *%ptr, i32 %index) { 61; CHECK-LABEL: f5: 62; CHECK-NOT: vlebrh 63; CHECK: br %r14 64 %element = load i16, i16 *%ptr 65 %swap = call i16 @llvm.bswap.i16(i16 %element) 66 %ret = insertelement <8 x i16> %val, i16 %swap, i32 %index 67 ret <8 x i16> %ret 68} 69 70; Test v8i16 insertion using a pair of vector bswaps. 71define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) { 72; CHECK-LABEL: f6: 73; CHECK: vlebrh %v24, 0(%r2), 0 74; CHECK: br %r14 75 %element = load i16, i16 *%ptr 76 %swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val) 77 %insert = insertelement <8 x i16> %swapval, i16 %element, i32 0 78 %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert) 79 ret <8 x i16> %ret 80} 81 82; Test v4i32 insertion into the first element. 83define <4 x i32> @f7(<4 x i32> %val, i32 *%ptr) { 84; CHECK-LABEL: f7: 85; CHECK: vlebrf %v24, 0(%r2), 0 86; CHECK: br %r14 87 %element = load i32, i32 *%ptr 88 %swap = call i32 @llvm.bswap.i32(i32 %element) 89 %ret = insertelement <4 x i32> %val, i32 %swap, i32 0 90 ret <4 x i32> %ret 91} 92 93; Test v4i32 insertion into the last element. 94define <4 x i32> @f8(<4 x i32> %val, i32 *%ptr) { 95; CHECK-LABEL: f8: 96; CHECK: vlebrf %v24, 0(%r2), 3 97; CHECK: br %r14 98 %element = load i32, i32 *%ptr 99 %swap = call i32 @llvm.bswap.i32(i32 %element) 100 %ret = insertelement <4 x i32> %val, i32 %swap, i32 3 101 ret <4 x i32> %ret 102} 103 104; Test v4i32 insertion with the highest in-range offset. 105define <4 x i32> @f9(<4 x i32> %val, i32 *%base) { 106; CHECK-LABEL: f9: 107; CHECK: vlebrf %v24, 4092(%r2), 2 108; CHECK: br %r14 109 %ptr = getelementptr i32, i32 *%base, i32 1023 110 %element = load i32, i32 *%ptr 111 %swap = call i32 @llvm.bswap.i32(i32 %element) 112 %ret = insertelement <4 x i32> %val, i32 %swap, i32 2 113 ret <4 x i32> %ret 114} 115 116; Test v4i32 insertion with the first ouf-of-range offset. 117define <4 x i32> @f10(<4 x i32> %val, i32 *%base) { 118; CHECK-LABEL: f10: 119; CHECK: aghi %r2, 4096 120; CHECK: vlebrf %v24, 0(%r2), 1 121; CHECK: br %r14 122 %ptr = getelementptr i32, i32 *%base, i32 1024 123 %element = load i32, i32 *%ptr 124 %swap = call i32 @llvm.bswap.i32(i32 %element) 125 %ret = insertelement <4 x i32> %val, i32 %swap, i32 1 126 ret <4 x i32> %ret 127} 128 129; Test v4i32 insertion into a variable element. 130define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr, i32 %index) { 131; CHECK-LABEL: f11: 132; CHECK-NOT: vlebrf 133; CHECK: br %r14 134 %element = load i32, i32 *%ptr 135 %swap = call i32 @llvm.bswap.i32(i32 %element) 136 %ret = insertelement <4 x i32> %val, i32 %swap, i32 %index 137 ret <4 x i32> %ret 138} 139 140; Test v4i32 insertion using a pair of vector bswaps. 141define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) { 142; CHECK-LABEL: f12: 143; CHECK: vlebrf %v24, 0(%r2), 0 144; CHECK: br %r14 145 %element = load i32, i32 *%ptr 146 %swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val) 147 %insert = insertelement <4 x i32> %swapval, i32 %element, i32 0 148 %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert) 149 ret <4 x i32> %ret 150} 151 152; Test v2i64 insertion into the first element. 153define <2 x i64> @f13(<2 x i64> %val, i64 *%ptr) { 154; CHECK-LABEL: f13: 155; CHECK: vlebrg %v24, 0(%r2), 0 156; CHECK: br %r14 157 %element = load i64, i64 *%ptr 158 %swap = call i64 @llvm.bswap.i64(i64 %element) 159 %ret = insertelement <2 x i64> %val, i64 %swap, i32 0 160 ret <2 x i64> %ret 161} 162 163; Test v2i64 insertion into the last element. 164define <2 x i64> @f14(<2 x i64> %val, i64 *%ptr) { 165; CHECK-LABEL: f14: 166; CHECK: vlebrg %v24, 0(%r2), 1 167; CHECK: br %r14 168 %element = load i64, i64 *%ptr 169 %swap = call i64 @llvm.bswap.i64(i64 %element) 170 %ret = insertelement <2 x i64> %val, i64 %swap, i32 1 171 ret <2 x i64> %ret 172} 173 174; Test v2i64 insertion with the highest in-range offset. 175define <2 x i64> @f15(<2 x i64> %val, i64 *%base) { 176; CHECK-LABEL: f15: 177; CHECK: vlebrg %v24, 4088(%r2), 1 178; CHECK: br %r14 179 %ptr = getelementptr i64, i64 *%base, i32 511 180 %element = load i64, i64 *%ptr 181 %swap = call i64 @llvm.bswap.i64(i64 %element) 182 %ret = insertelement <2 x i64> %val, i64 %swap, i32 1 183 ret <2 x i64> %ret 184} 185 186; Test v2i64 insertion with the first ouf-of-range offset. 187define <2 x i64> @f16(<2 x i64> %val, i64 *%base) { 188; CHECK-LABEL: f16: 189; CHECK: aghi %r2, 4096 190; CHECK: vlebrg %v24, 0(%r2), 0 191; CHECK: br %r14 192 %ptr = getelementptr i64, i64 *%base, i32 512 193 %element = load i64, i64 *%ptr 194 %swap = call i64 @llvm.bswap.i64(i64 %element) 195 %ret = insertelement <2 x i64> %val, i64 %swap, i32 0 196 ret <2 x i64> %ret 197} 198 199; Test v2i64 insertion into a variable element. 200define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr, i32 %index) { 201; CHECK-LABEL: f17: 202; CHECK-NOT: vlebrg 203; CHECK: br %r14 204 %element = load i64, i64 *%ptr 205 %swap = call i64 @llvm.bswap.i64(i64 %element) 206 %ret = insertelement <2 x i64> %val, i64 %swap, i32 %index 207 ret <2 x i64> %ret 208} 209 210; Test v2i64 insertion using a pair of vector bswaps. 211define <2 x i64> @f18(<2 x i64> %val, i64 *%ptr) { 212; CHECK-LABEL: f18: 213; CHECK: vlebrg %v24, 0(%r2), 0 214; CHECK: br %r14 215 %element = load i64, i64 *%ptr 216 %swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val) 217 %insert = insertelement <2 x i64> %swapval, i64 %element, i32 0 218 %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert) 219 ret <2 x i64> %ret 220} 221