1; Test replications of a byte-swapped scalar memory value. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5declare i16 @llvm.bswap.i16(i16) 6declare i32 @llvm.bswap.i32(i32) 7declare i64 @llvm.bswap.i64(i64) 8declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) 9declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 10declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) 11 12; Test a v8i16 replicating load with no offset. 13define <8 x i16> @f1(i16 *%ptr) { 14; CHECK-LABEL: f1: 15; CHECK: vlbrreph %v24, 0(%r2) 16; CHECK: br %r14 17 %scalar = load i16, i16 *%ptr 18 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 19 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 20 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 21 <8 x i32> zeroinitializer 22 ret <8 x i16> %ret 23} 24 25; Test a v8i16 replicating load with the maximum in-range offset. 26define <8 x i16> @f2(i16 *%base) { 27; CHECK-LABEL: f2: 28; CHECK: vlbrreph %v24, 4094(%r2) 29; CHECK: br %r14 30 %ptr = getelementptr i16, i16 *%base, i64 2047 31 %scalar = load i16, i16 *%ptr 32 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 33 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 34 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 35 <8 x i32> zeroinitializer 36 ret <8 x i16> %ret 37} 38 39; Test a v8i16 replicating load with the first out-of-range offset. 40define <8 x i16> @f3(i16 *%base) { 41; CHECK-LABEL: f3: 42; CHECK: aghi %r2, 4096 43; CHECK: vlbrreph %v24, 0(%r2) 44; CHECK: br %r14 45 %ptr = getelementptr i16, i16 *%base, i64 2048 46 %scalar = load i16, i16 *%ptr 47 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 48 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 49 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 50 <8 x i32> zeroinitializer 51 ret <8 x i16> %ret 52} 53 54; Test a v8i16 replicating load using a vector bswap. 55define <8 x i16> @f4(i16 *%ptr) { 56; CHECK-LABEL: f4: 57; CHECK: vlbrreph %v24, 0(%r2) 58; CHECK: br %r14 59 %scalar = load i16, i16 *%ptr 60 %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 61 %rep = shufflevector <8 x i16> %val, <8 x i16> undef, 62 <8 x i32> zeroinitializer 63 %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep) 64 ret <8 x i16> %ret 65} 66 67; Test a v4i32 replicating load with no offset. 68define <4 x i32> @f5(i32 *%ptr) { 69; CHECK-LABEL: f5: 70; CHECK: vlbrrepf %v24, 0(%r2) 71; CHECK: br %r14 72 %scalar = load i32, i32 *%ptr 73 %swap = call i32 @llvm.bswap.i32(i32 %scalar) 74 %val = insertelement <4 x i32> undef, i32 %swap, i32 0 75 %ret = shufflevector <4 x i32> %val, <4 x i32> undef, 76 <4 x i32> zeroinitializer 77 ret <4 x i32> %ret 78} 79 80; Test a v4i32 replicating load with the maximum in-range offset. 81define <4 x i32> @f6(i32 *%base) { 82; CHECK-LABEL: f6: 83; CHECK: vlbrrepf %v24, 4092(%r2) 84; CHECK: br %r14 85 %ptr = getelementptr i32, i32 *%base, i64 1023 86 %scalar = load i32, i32 *%ptr 87 %swap = call i32 @llvm.bswap.i32(i32 %scalar) 88 %val = insertelement <4 x i32> undef, i32 %swap, i32 0 89 %ret = shufflevector <4 x i32> %val, <4 x i32> undef, 90 <4 x i32> zeroinitializer 91 ret <4 x i32> %ret 92} 93 94; Test a v4i32 replicating load with the first out-of-range offset. 95define <4 x i32> @f7(i32 *%base) { 96; CHECK-LABEL: f7: 97; CHECK: aghi %r2, 4096 98; CHECK: vlbrrepf %v24, 0(%r2) 99; CHECK: br %r14 100 %ptr = getelementptr i32, i32 *%base, i64 1024 101 %scalar = load i32, i32 *%ptr 102 %swap = call i32 @llvm.bswap.i32(i32 %scalar) 103 %val = insertelement <4 x i32> undef, i32 %swap, i32 0 104 %ret = shufflevector <4 x i32> %val, <4 x i32> undef, 105 <4 x i32> zeroinitializer 106 ret <4 x i32> %ret 107} 108 109; Test a v4i32 replicating load using a vector bswap. 110define <4 x i32> @f8(i32 *%ptr) { 111; CHECK-LABEL: f8: 112; CHECK: vlbrrepf %v24, 0(%r2) 113; CHECK: br %r14 114 %scalar = load i32, i32 *%ptr 115 %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 116 %rep = shufflevector <4 x i32> %val, <4 x i32> undef, 117 <4 x i32> zeroinitializer 118 %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep) 119 ret <4 x i32> %ret 120} 121 122; Test a v2i64 replicating load with no offset. 123define <2 x i64> @f9(i64 *%ptr) { 124; CHECK-LABEL: f9: 125; CHECK: vlbrrepg %v24, 0(%r2) 126; CHECK: br %r14 127 %scalar = load i64, i64 *%ptr 128 %swap = call i64 @llvm.bswap.i64(i64 %scalar) 129 %val = insertelement <2 x i64> undef, i64 %swap, i32 0 130 %ret = shufflevector <2 x i64> %val, <2 x i64> undef, 131 <2 x i32> zeroinitializer 132 ret <2 x i64> %ret 133} 134 135; Test a v2i64 replicating load with the maximum in-range offset. 136define <2 x i64> @f10(i64 *%base) { 137; CHECK-LABEL: f10: 138; CHECK: vlbrrepg %v24, 4088(%r2) 139; CHECK: br %r14 140 %ptr = getelementptr i64, i64 *%base, i32 511 141 %scalar = load i64, i64 *%ptr 142 %swap = call i64 @llvm.bswap.i64(i64 %scalar) 143 %val = insertelement <2 x i64> undef, i64 %swap, i32 0 144 %ret = shufflevector <2 x i64> %val, <2 x i64> undef, 145 <2 x i32> zeroinitializer 146 ret <2 x i64> %ret 147} 148 149; Test a v2i64 replicating load with the first out-of-range offset. 150define <2 x i64> @f11(i64 *%base) { 151; CHECK-LABEL: f11: 152; CHECK: aghi %r2, 4096 153; CHECK: vlbrrepg %v24, 0(%r2) 154; CHECK: br %r14 155 %ptr = getelementptr i64, i64 *%base, i32 512 156 %scalar = load i64, i64 *%ptr 157 %swap = call i64 @llvm.bswap.i64(i64 %scalar) 158 %val = insertelement <2 x i64> undef, i64 %swap, i32 0 159 %ret = shufflevector <2 x i64> %val, <2 x i64> undef, 160 <2 x i32> zeroinitializer 161 ret <2 x i64> %ret 162} 163 164; Test a v2i64 replicating load using a vector bswap. 165define <2 x i64> @f12(i64 *%ptr) { 166; CHECK-LABEL: f12: 167; CHECK: vlbrrepg %v24, 0(%r2) 168; CHECK: br %r14 169 %scalar = load i64, i64 *%ptr 170 %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 171 %rep = shufflevector <2 x i64> %val, <2 x i64> undef, 172 <2 x i32> zeroinitializer 173 %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep) 174 ret <2 x i64> %ret 175} 176 177; Test a v8i16 replicating load with an index. 178define <8 x i16> @f13(i16 *%base, i64 %index) { 179; CHECK-LABEL: f13: 180; CHECK: sllg [[REG:%r[1-5]]], %r3, 1 181; CHECK: vlbrreph %v24, 2046([[REG]],%r2) 182; CHECK: br %r14 183 %ptr1 = getelementptr i16, i16 *%base, i64 %index 184 %ptr = getelementptr i16, i16 *%ptr1, i64 1023 185 %scalar = load i16, i16 *%ptr 186 %swap = call i16 @llvm.bswap.i16(i16 %scalar) 187 %val = insertelement <8 x i16> undef, i16 %swap, i32 0 188 %ret = shufflevector <8 x i16> %val, <8 x i16> undef, 189 <8 x i32> zeroinitializer 190 ret <8 x i16> %ret 191} 192 193