1; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -arm-atomic-cfg-tidy=0 -float-abi=hard -mcpu=cortex-a9 -O3 | FileCheck %s 2 3target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32-S64" 4 5; This test used to test vector spilling using vstmia/vldmia instructions, but 6; the changes for PR:18825 prevent that spilling. 7 8; VST1 and VLD1 are now used for spilling/restoring. 9; 10; TODO: 11; I think more vldm should be generated, initial ones are used to load some 12; elements and then a sequence of vldr are used: 13; vldr d15, [r1, #104] 14; vldr d13, [r2, #96] 15; vldr d9, [r1, #120] 16; vldr d11, [r2, #112] 17; vldr d14, [r1, #96] 18; vldr d12, [r2, #88] 19; vldr d8, [r1, #112] 20; vldr d10, [r2, #104] 21 22; Also this patterns repeats several times which certainly seems like a vld1.64 23; should be used to load the data: 24; vldr d16, [r1, #16] 25; vldr d17, [r1, #24] 26; vst1.64 {d16, d17}, [lr:128] @ 16-byte Spill 27 28; CHECK: test: 29; CHECK: vldmia r{{.*}}, {d{{.*}}, d{{.*}}} 30; CHECK: vldmia r{{.*}}, {d{{.*}}, d{{.*}}} 31define <16 x i64> @test(i64* %src0, i64* %src1) #0 { 32entry: 33 %addr.0 = getelementptr inbounds i64, i64* %src0, i32 0 34 %el.0 = load i64, i64* %addr.0, align 8 35 %addr.1 = getelementptr inbounds i64, i64* %src0, i32 1 36 %el.1 = load i64, i64* %addr.1, align 8 37 %addr.2 = getelementptr inbounds i64, i64* %src0, i32 2 38 %el.2 = load i64, i64* %addr.2, align 8 39 %addr.3 = getelementptr inbounds i64, i64* %src0, i32 3 40 %el.3 = load i64, i64* %addr.3, align 8 41 %addr.4 = getelementptr inbounds i64, i64* %src0, i32 4 42 %el.4 = load i64, i64* %addr.4, align 8 43 %addr.5 = getelementptr inbounds i64, i64* %src0, i32 5 44 %el.5 = load i64, i64* %addr.5, align 8 45 %addr.6 = getelementptr inbounds i64, i64* %src0, i32 6 46 %el.6 = load i64, i64* %addr.6, align 8 47 %addr.7 = getelementptr inbounds i64, i64* %src0, i32 7 48 %el.7 = load i64, i64* %addr.7, align 8 49 %addr.8 = getelementptr inbounds i64, i64* %src0, i32 8 50 %el.8 = load i64, i64* %addr.8, align 8 51 %addr.9 = getelementptr inbounds i64, i64* %src0, i32 9 52 %el.9 = load i64, i64* %addr.9, align 8 53 %addr.10 = getelementptr inbounds i64, i64* %src0, i32 10 54 %el.10 = load i64, i64* %addr.10, align 8 55 %addr.11 = getelementptr inbounds i64, i64* %src0, i32 11 56 %el.11 = load i64, i64* %addr.11, align 8 57 %addr.12 = getelementptr inbounds i64, i64* %src0, i32 12 58 %el.12 = load i64, i64* %addr.12, align 8 59 %addr.13 = getelementptr inbounds i64, i64* %src0, i32 13 60 %el.13 = load i64, i64* %addr.13, align 8 61 %addr.14 = getelementptr inbounds i64, i64* %src0, i32 14 62 %el.14 = load i64, i64* %addr.14, align 8 63 %addr.15 = getelementptr inbounds i64, i64* %src0, i32 15 64 %el.15 = load i64, i64* %addr.15, align 8 65 66 %addr.0.1 = getelementptr inbounds i64, i64* %src1, i32 0 67 %el.0.1 = load i64, i64* %addr.0.1, align 8 68 %addr.1.1 = getelementptr inbounds i64, i64* %src1, i32 1 69 %el.1.1 = load i64, i64* %addr.1.1, align 8 70 %addr.2.1 = getelementptr inbounds i64, i64* %src1, i32 2 71 %el.2.1 = load i64, i64* %addr.2.1, align 8 72 %addr.3.1 = getelementptr inbounds i64, i64* %src1, i32 3 73 %el.3.1 = load i64, i64* %addr.3.1, align 8 74 %addr.4.1 = getelementptr inbounds i64, i64* %src1, i32 4 75 %el.4.1 = load i64, i64* %addr.4.1, align 8 76 %addr.5.1 = getelementptr inbounds i64, i64* %src1, i32 5 77 %el.5.1 = load i64, i64* %addr.5.1, align 8 78 %addr.6.1 = getelementptr inbounds i64, i64* %src1, i32 6 79 %el.6.1 = load i64, i64* %addr.6.1, align 8 80 %addr.7.1 = getelementptr inbounds i64, i64* %src1, i32 7 81 %el.7.1 = load i64, i64* %addr.7.1, align 8 82 %addr.8.1 = getelementptr inbounds i64, i64* %src1, i32 8 83 %el.8.1 = load i64, i64* %addr.8.1, align 8 84 %addr.9.1 = getelementptr inbounds i64, i64* %src1, i32 9 85 %el.9.1 = load i64, i64* %addr.9.1, align 8 86 %addr.10.1 = getelementptr inbounds i64, i64* %src1, i32 10 87 %el.10.1 = load i64, i64* %addr.10.1, align 8 88 %addr.11.1 = getelementptr inbounds i64, i64* %src1, i32 11 89 %el.11.1 = load i64, i64* %addr.11.1, align 8 90 %addr.12.1 = getelementptr inbounds i64, i64* %src1, i32 12 91 %el.12.1 = load i64, i64* %addr.12.1, align 8 92 %addr.13.1 = getelementptr inbounds i64, i64* %src1, i32 13 93 %el.13.1 = load i64, i64* %addr.13.1, align 8 94 %addr.14.1 = getelementptr inbounds i64, i64* %src1, i32 14 95 %el.14.1 = load i64, i64* %addr.14.1, align 8 96 %addr.15.1 = getelementptr inbounds i64, i64* %src1, i32 15 97 %el.15.1 = load i64, i64* %addr.15.1, align 8 98 %vec.0 = insertelement <16 x i64> undef, i64 %el.0, i32 0 99 %vec.1 = insertelement <16 x i64> %vec.0, i64 %el.1, i32 1 100 %vec.2 = insertelement <16 x i64> %vec.1, i64 %el.2, i32 2 101 %vec.3 = insertelement <16 x i64> %vec.2, i64 %el.3, i32 3 102 %vec.4 = insertelement <16 x i64> %vec.3, i64 %el.4, i32 4 103 %vec.5 = insertelement <16 x i64> %vec.4, i64 %el.5, i32 5 104 %vec.6 = insertelement <16 x i64> %vec.5, i64 %el.6, i32 6 105 %vec.7 = insertelement <16 x i64> %vec.6, i64 %el.7, i32 7 106 %vec.8 = insertelement <16 x i64> %vec.7, i64 %el.8, i32 8 107 %vec.9 = insertelement <16 x i64> %vec.8, i64 %el.9, i32 9 108 %vec.10 = insertelement <16 x i64> %vec.9, i64 %el.10, i32 10 109 %vec.11 = insertelement <16 x i64> %vec.10, i64 %el.11, i32 11 110 %vec.12 = insertelement <16 x i64> %vec.11, i64 %el.12, i32 12 111 %vec.13 = insertelement <16 x i64> %vec.12, i64 %el.13, i32 13 112 %vec.14 = insertelement <16 x i64> %vec.13, i64 %el.14, i32 14 113 %vec.15 = insertelement <16 x i64> %vec.14, i64 %el.15, i32 15 114 call void @capture(i64* %src0, i64* %src1) 115 %vec.0.1 = insertelement <16 x i64> undef, i64 %el.0.1, i32 0 116 %vec.1.1 = insertelement <16 x i64> %vec.0.1, i64 %el.1.1, i32 1 117 %vec.2.1 = insertelement <16 x i64> %vec.1.1, i64 %el.2.1, i32 2 118 %vec.3.1 = insertelement <16 x i64> %vec.2.1, i64 %el.3.1, i32 3 119 %vec.4.1 = insertelement <16 x i64> %vec.3.1, i64 %el.4.1, i32 4 120 %vec.5.1 = insertelement <16 x i64> %vec.4.1, i64 %el.5.1, i32 5 121 %vec.6.1 = insertelement <16 x i64> %vec.5.1, i64 %el.6.1, i32 6 122 %vec.7.1 = insertelement <16 x i64> %vec.6.1, i64 %el.7.1, i32 7 123 %vec.8.1 = insertelement <16 x i64> %vec.7.1, i64 %el.7.1, i32 8 124 %vec.9.1 = insertelement <16 x i64> %vec.8.1, i64 %el.8.1, i32 9 125 %vec.10.1 = insertelement <16 x i64> %vec.9.1, i64 %el.9.1, i32 10 126 %vec.11.1 = insertelement <16 x i64> %vec.10.1, i64 %el.10.1, i32 11 127 %vec.12.1 = insertelement <16 x i64> %vec.11.1, i64 %el.11.1, i32 12 128 %vec.13.1 = insertelement <16 x i64> %vec.12.1, i64 %el.12.1, i32 13 129 %vec.14.1 = insertelement <16 x i64> %vec.13.1, i64 %el.13.1, i32 14 130 %vec.15.1 = insertelement <16 x i64> %vec.14.1, i64 %el.14.1, i32 15 131 %res = add <16 x i64> %vec.15, %vec.15.1 132 ret <16 x i64> %res 133} 134 135declare void @capture(i64*, i64*) 136 137attributes #0 = { noredzone "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } 138