1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ 4; RUN: -check-prefix=P9 5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 6; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ 7; RUN: -check-prefix=P8 8define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr { 9; P9-LABEL: test: 10; P9: # %bb.0: # %entry 11; P9-NEXT: addi r4, r4, 24 12; P9-NEXT: lxvdsx vs0, 0, r4 13; P9-NEXT: stxv vs0, 0(r3) 14; P9-NEXT: blr 15; 16; P8-LABEL: test: 17; P8: # %bb.0: # %entry 18; P8-NEXT: addi r4, r4, 24 19; P8-NEXT: lxvdsx vs0, 0, r4 20; P8-NEXT: stxvd2x vs0, 0, r3 21; P8-NEXT: blr 22entry: 23 %arrayidx = getelementptr inbounds double, double* %a, i64 3 24 %0 = load double, double* %arrayidx, align 8 25 %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0 26 %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer 27 store <2 x double> %splat.splat.i, <2 x double>* %c, align 16 28 ret void 29} 30 31define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr { 32; P9-LABEL: test2: 33; P9: # %bb.0: # %entry 34; P9-NEXT: addi r4, r4, 12 35; P9-NEXT: lxvwsx vs0, 0, r4 36; P9-NEXT: stxv vs0, 0(r3) 37; P9-NEXT: blr 38; 39; P8-LABEL: test2: 40; P8: # %bb.0: # %entry 41; P8-NEXT: addi r4, r4, 12 42; P8-NEXT: lfiwzx f0, 0, r4 43; P8-NEXT: xxspltw v2, vs0, 1 44; P8-NEXT: stvx v2, 0, r3 45; P8-NEXT: blr 46entry: 47 %arrayidx = getelementptr inbounds float, float* %a, i64 3 48 %0 = load float, float* %arrayidx, align 4 49 %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0 50 %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer 51 store <4 x float> %splat.splat.i, <4 x float>* %c, align 16 52 ret void 53} 54 55define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr { 56; P9-LABEL: test3: 57; P9: # %bb.0: # %entry 58; P9-NEXT: addi r4, r4, 12 59; P9-NEXT: lxvwsx vs0, 0, r4 60; P9-NEXT: stxv vs0, 0(r3) 61; P9-NEXT: blr 62; 63; P8-LABEL: test3: 64; P8: # %bb.0: # %entry 65; P8-NEXT: addi r4, r4, 12 66; P8-NEXT: lfiwzx f0, 0, r4 67; P8-NEXT: xxspltw v2, vs0, 1 68; P8-NEXT: stvx v2, 0, r3 69; P8-NEXT: blr 70entry: 71 %arrayidx = getelementptr inbounds i32, i32* %a, i64 3 72 %0 = load i32, i32* %arrayidx, align 4 73 %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0 74 %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer 75 store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16 76 ret void 77} 78 79define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr { 80; P9-LABEL: test4: 81; P9: # %bb.0: # %entry 82; P9-NEXT: addi r4, r4, 24 83; P9-NEXT: lxvdsx vs0, 0, r4 84; P9-NEXT: stxv vs0, 0(r3) 85; P9-NEXT: blr 86; 87; P8-LABEL: test4: 88; P8: # %bb.0: # %entry 89; P8-NEXT: addi r4, r4, 24 90; P8-NEXT: lxvdsx vs0, 0, r4 91; P8-NEXT: stxvd2x vs0, 0, r3 92; P8-NEXT: blr 93entry: 94 %arrayidx = getelementptr inbounds i64, i64* %a, i64 3 95 %0 = load i64, i64* %arrayidx, align 8 96 %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0 97 %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer 98 store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16 99 ret void 100} 101 102define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) { 103; P9-LABEL: unadjusted_lxvwsx: 104; P9: # %bb.0: # %entry 105; P9-NEXT: lxvwsx v2, 0, r3 106; P9-NEXT: blr 107; 108; P8-LABEL: unadjusted_lxvwsx: 109; P8: # %bb.0: # %entry 110; P8-NEXT: lfiwzx f0, 0, r3 111; P8-NEXT: xxspltw v2, vs0, 1 112; P8-NEXT: blr 113 entry: 114 %0 = bitcast i32* %s to <4 x i8>* 115 %1 = load <4 x i8>, <4 x i8>* %0, align 4 116 %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 117 ret <16 x i8> %2 118} 119 120define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) { 121; P9-LABEL: adjusted_lxvwsx: 122; P9: # %bb.0: # %entry 123; P9-NEXT: addi r3, r3, 4 124; P9-NEXT: lxvwsx v2, 0, r3 125; P9-NEXT: blr 126; 127; P8-LABEL: adjusted_lxvwsx: 128; P8: # %bb.0: # %entry 129; P8-NEXT: ld r3, 0(r3) 130; P8-NEXT: mtfprd f0, r3 131; P8-NEXT: xxspltw v2, vs0, 0 132; P8-NEXT: blr 133 entry: 134 %0 = bitcast i64* %s to <8 x i8>* 135 %1 = load <8 x i8>, <8 x i8>* %0, align 8 136 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 137 ret <16 x i8> %2 138} 139 140define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 141; P9-LABEL: unadjusted_lxvwsx_v16i8: 142; P9: # %bb.0: # %entry 143; P9-NEXT: lxvwsx v2, 0, r3 144; P9-NEXT: blr 145; 146; P8-LABEL: unadjusted_lxvwsx_v16i8: 147; P8: # %bb.0: # %entry 148; P8-NEXT: lvx v2, 0, r3 149; P8-NEXT: xxspltw v2, v2, 3 150; P8-NEXT: blr 151 entry: 152 %0 = load <16 x i8>, <16 x i8>* %s, align 16 153 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 154 ret <16 x i8> %1 155} 156 157define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 158; P9-LABEL: adjusted_lxvwsx_v16i8: 159; P9: # %bb.0: # %entry 160; P9-NEXT: addi r3, r3, 4 161; P9-NEXT: lxvwsx v2, 0, r3 162; P9-NEXT: blr 163; 164; P8-LABEL: adjusted_lxvwsx_v16i8: 165; P8: # %bb.0: # %entry 166; P8-NEXT: lvx v2, 0, r3 167; P8-NEXT: xxspltw v2, v2, 2 168; P8-NEXT: blr 169 entry: 170 %0 = load <16 x i8>, <16 x i8>* %s, align 16 171 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 172 ret <16 x i8> %1 173} 174 175define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) { 176; P9-LABEL: adjusted_lxvwsx_v16i8_2: 177; P9: # %bb.0: # %entry 178; P9-NEXT: addi r3, r3, 8 179; P9-NEXT: lxvwsx v2, 0, r3 180; P9-NEXT: blr 181; 182; P8-LABEL: adjusted_lxvwsx_v16i8_2: 183; P8: # %bb.0: # %entry 184; P8-NEXT: lvx v2, 0, r3 185; P8-NEXT: xxspltw v2, v2, 1 186; P8-NEXT: blr 187 entry: 188 %0 = load <16 x i8>, <16 x i8>* %s, align 16 189 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11> 190 ret <16 x i8> %1 191} 192 193define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) { 194; P9-LABEL: adjusted_lxvwsx_v16i8_3: 195; P9: # %bb.0: # %entry 196; P9-NEXT: addi r3, r3, 12 197; P9-NEXT: lxvwsx v2, 0, r3 198; P9-NEXT: blr 199; 200; P8-LABEL: adjusted_lxvwsx_v16i8_3: 201; P8: # %bb.0: # %entry 202; P8-NEXT: lvx v2, 0, r3 203; P8-NEXT: xxspltw v2, v2, 0 204; P8-NEXT: blr 205 entry: 206 %0 = load <16 x i8>, <16 x i8>* %s, align 16 207 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15> 208 ret <16 x i8> %1 209} 210 211define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) { 212; P9-LABEL: unadjusted_lxvdsx: 213; P9: # %bb.0: # %entry 214; P9-NEXT: lxvdsx v2, 0, r3 215; P9-NEXT: blr 216; 217; P8-LABEL: unadjusted_lxvdsx: 218; P8: # %bb.0: # %entry 219; P8-NEXT: lxvdsx v2, 0, r3 220; P8-NEXT: blr 221 entry: 222 %0 = bitcast i64* %s to <8 x i8>* 223 %1 = load <8 x i8>, <8 x i8>* %0, align 8 224 %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 225 ret <16 x i8> %2 226} 227 228define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 229; P9-LABEL: unadjusted_lxvdsx_v16i8: 230; P9: # %bb.0: # %entry 231; P9-NEXT: lxvdsx v2, 0, r3 232; P9-NEXT: blr 233; 234; P8-LABEL: unadjusted_lxvdsx_v16i8: 235; P8: # %bb.0: # %entry 236; P8-NEXT: lxvdsx v2, 0, r3 237; P8-NEXT: blr 238 entry: 239 %0 = load <16 x i8>, <16 x i8>* %s, align 16 240 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 241 ret <16 x i8> %1 242} 243 244define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) { 245; P9-LABEL: adjusted_lxvdsx_v16i8: 246; P9: # %bb.0: # %entry 247; P9-NEXT: addi r3, r3, 8 248; P9-NEXT: lxvdsx v2, 0, r3 249; P9-NEXT: blr 250; 251; P8-LABEL: adjusted_lxvdsx_v16i8: 252; P8: # %bb.0: # %entry 253; P8-NEXT: addi r3, r3, 8 254; P8-NEXT: lxvdsx v2, 0, r3 255; P8-NEXT: blr 256 entry: 257 %0 = load <16 x i8>, <16 x i8>* %s, align 16 258 %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 259 ret <16 x i8> %1 260} 261