1; Test loads of byte-swapped vector elements. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s 4 5; Test v16i8 loads. 6define <16 x i8> @f1(<16 x i8> *%ptr) { 7; CHECK-LABEL: f1: 8; CHECK: vlbrq %v24, 0(%r2) 9; CHECK: br %r14 10 %load = load <16 x i8>, <16 x i8> *%ptr 11 %ret = shufflevector <16 x i8> %load, <16 x i8> undef, 12 <16 x i32> <i32 15, i32 14, i32 13, i32 12, 13 i32 11, i32 10, i32 9, i32 8, 14 i32 7, i32 6, i32 5, i32 4, 15 i32 3, i32 2, i32 1, i32 0> 16 ret <16 x i8> %ret 17} 18 19; Test v8i16 loads. 20define <8 x i16> @f2(<8 x i16> *%ptr) { 21; CHECK-LABEL: f2: 22; CHECK: vlerh %v24, 0(%r2) 23; CHECK: br %r14 24 %load = load <8 x i16>, <8 x i16> *%ptr 25 %ret = shufflevector <8 x i16> %load, <8 x i16> undef, 26 <8 x i32> <i32 7, i32 6, i32 5, i32 4, 27 i32 3, i32 2, i32 1, i32 0> 28 ret <8 x i16> %ret 29} 30 31; Test v4i32 loads. 32define <4 x i32> @f3(<4 x i32> *%ptr) { 33; CHECK-LABEL: f3: 34; CHECK: vlerf %v24, 0(%r2) 35; CHECK: br %r14 36 %load = load <4 x i32>, <4 x i32> *%ptr 37 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 38 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 39 ret <4 x i32> %ret 40} 41 42; Test v2i64 loads. 43define <2 x i64> @f4(<2 x i64> *%ptr) { 44; CHECK-LABEL: f4: 45; CHECK: vlerg %v24, 0(%r2) 46; CHECK: br %r14 47 %load = load <2 x i64>, <2 x i64> *%ptr 48 %ret = shufflevector <2 x i64> %load, <2 x i64> undef, 49 <2 x i32> <i32 1, i32 0> 50 ret <2 x i64> %ret 51} 52 53; Test v4f32 loads. 54define <4 x float> @f5(<4 x float> *%ptr) { 55; CHECK-LABEL: f5: 56; CHECK: vlerf %v24, 0(%r2) 57; CHECK: br %r14 58 %load = load <4 x float>, <4 x float> *%ptr 59 %ret = shufflevector <4 x float> %load, <4 x float> undef, 60 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 61 ret <4 x float> %ret 62} 63 64; Test v2f64 loads. 65define <2 x double> @f6(<2 x double> *%ptr) { 66; CHECK-LABEL: f6: 67; CHECK: vlerg %v24, 0(%r2) 68; CHECK: br %r14 69 %load = load <2 x double>, <2 x double> *%ptr 70 %ret = shufflevector <2 x double> %load, <2 x double> undef, 71 <2 x i32> <i32 1, i32 0> 72 ret <2 x double> %ret 73} 74 75; Test the highest aligned in-range offset. 76define <4 x i32> @f7(<4 x i32> *%base) { 77; CHECK-LABEL: f7: 78; CHECK: vlerf %v24, 4080(%r2) 79; CHECK: br %r14 80 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255 81 %load = load <4 x i32>, <4 x i32> *%ptr 82 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 83 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 84 ret <4 x i32> %ret 85} 86 87; Test the highest unaligned in-range offset. 88define <4 x i32> @f8(i8 *%base) { 89; CHECK-LABEL: f8: 90; CHECK: vlerf %v24, 4095(%r2) 91; CHECK: br %r14 92 %addr = getelementptr i8, i8 *%base, i64 4095 93 %ptr = bitcast i8 *%addr to <4 x i32> * 94 %load = load <4 x i32>, <4 x i32> *%ptr 95 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 96 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 97 ret <4 x i32> %ret 98} 99 100; Test the next offset up, which requires separate address logic, 101define <4 x i32> @f9(<4 x i32> *%base) { 102; CHECK-LABEL: f9: 103; CHECK: aghi %r2, 4096 104; CHECK: vlerf %v24, 0(%r2) 105; CHECK: br %r14 106 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256 107 %load = load <4 x i32>, <4 x i32> *%ptr 108 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 109 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 110 ret <4 x i32> %ret 111} 112 113; Test negative offsets, which also require separate address logic, 114define <4 x i32> @f10(<4 x i32> *%base) { 115; CHECK-LABEL: f10: 116; CHECK: aghi %r2, -16 117; CHECK: vlerf %v24, 0(%r2) 118; CHECK: br %r14 119 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1 120 %load = load <4 x i32>, <4 x i32> *%ptr 121 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 122 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 123 ret <4 x i32> %ret 124} 125 126; Check that indexes are allowed. 127define <4 x i32> @f11(i8 *%base, i64 %index) { 128; CHECK-LABEL: f11: 129; CHECK: vlerf %v24, 0(%r3,%r2) 130; CHECK: br %r14 131 %addr = getelementptr i8, i8 *%base, i64 %index 132 %ptr = bitcast i8 *%addr to <4 x i32> * 133 %load = load <4 x i32>, <4 x i32> *%ptr 134 %ret = shufflevector <4 x i32> %load, <4 x i32> undef, 135 <4 x i32> <i32 3, i32 2, i32 1, i32 0> 136 ret <4 x i32> %ret 137} 138 139