/external/valgrind/none/tests/amd64/ |
D | sse4-64.stdout.exp-older-glibc | 2005 r insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2006 m insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2007 r insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2008 m insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2009 r insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2010 m insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2011 r insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2012 m insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2013 r insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004… 2014 m insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004… [all …]
|
D | sse4-64.stdout.exp | 2005 r insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2006 m insertps $0 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2007 r insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2008 m insertps $1 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266664… 2009 r insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2010 m insertps $2 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2011 r insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2012 m insertps $3 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29416266660… 2013 r insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004… 2014 m insertps $4 410e6666c0d66666c059999a3f99999a c1815c29416266664142147bc121c28f c1815c29000000004… [all …]
|
/external/llvm/test/CodeGen/X86/ |
D | avx.ll | 28 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 38 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 48) 42 ;; Use a non-zero CountS for insertps 52 %2 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %1, i32 96) 67 %3 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %2, i32 192) 77 ; CHECK: insertps $48 85 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48) 94 ; CHECK: insertps $48 102 %7 = tail call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %6, i32 48) 114 ; CHECK: insertps $48 [all …]
|
D | sse41.ll | 144 ; X32-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3] 149 ; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,xmm0[3] 151 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 21) nounwi… 155 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 157 ; When optimizing for speed, prefer blendps over insertps even if it means we have to 174 ; When optimizing for size, generate an insertps if there's a load fold opportunity. 176 ; generate an insertps for X32 but not for X64! 180 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3] 192 ; is always just a blendps because blendps is never more expensive than insertps. 267 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always [all …]
|
D | insertps-from-constantpool.ll | 4 ; Test for case where insertps folds the load of an insertion element from a constant pool. 9 ; X32-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3] 14 ; X64-NEXT: insertps {{.*#+}} xmm0 = mem[0],xmm0[1,2,3] 16 …%1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> <float 0.0, float 1.0, … 20 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
D | insertps-O0-bug.ll | 12 ; of an insertps is considered to be profitable. 24 ; always considered unprofitable at -O0. This would leave the insertps mask 33 ; This test checks that the vector load in input to the insertps is not 40 ; CHECK: insertps $64, [[REG]], 44 %1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a, <4 x float> %0, i32 64) 52 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32)
|
D | insertps-combine.ll | 9 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[2] 27 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[0] 45 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0] 63 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm0[0] 80 ; SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm1[2],zero 99 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],zero,zero 107 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %res0, <4 x float> %a1, i8 21) 125 %res1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %res0, i8 21) 135 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] 159 ; SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2,2,3] [all …]
|
D | insertps-unfold-load-bug.ll | 4 ; Test for case where insertps was folding the load of the insertion element, but a later optimizat… 14 ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 22 ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
D | vec_set-3.ll | 7 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,mem[0],zero,zero 30 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
|
D | merge-consecutive-loads-128.ll | 219 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 232 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 260 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 273 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 710 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 711 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 712 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 727 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 728 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 729 ; X32-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
D | fold-load-vec.ll | 9 ; CHECK: insertps
|
D | sse41-intrinsics-x86-upgrade.ll | 54 ; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] 56 …%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 17) ; <<4 x… 59 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
|
D | sse41-intrinsics-x86.ll | 78 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] 85 …%res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x … 88 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
D | sse41-intrinsics-fast-isel.ll | 581 ; X32-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 586 ; X64-NEXT: insertps {{.*#+}} xmm0 = xmm1[0],xmm0[1],zero,xmm0[3] 588 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 4) 591 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
D | vector-shuffle-128-v4.ll | 675 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero 713 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero 748 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0] 823 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[2],zero,zero 1067 ; SSE41-NEXT: insertps {{.*#+}} xmm1 = zero,zero,zero,xmm1[0] 1111 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,zero,xmm1[2] 1156 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],zero,xmm0[2],xmm1[0]
|
D | stack-folding-fp-sse42.ll | 660 ;CHECK: insertps $17, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 663 %2 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 209) 666 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
|
D | vector-shuffle-variable-128.ll | 101 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 102 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 103 ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
|
/external/llvm/test/Transforms/InstCombine/ |
D | x86-insertps.ll | 3 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone 8 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) 12 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c) 19 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15) 26 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255) 36 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 40 ; CHECK-NEXT: call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12) 47 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21) 58 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26) 69 %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193) [all …]
|
/external/swiftshader/third_party/LLVM/test/CodeGen/X86/ |
D | sse41.ll | 154 …%tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwin… 157 ; X32: insertps $1, %xmm1, %xmm0 160 ; X64: insertps $1, %xmm1, %xmm0 163 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone 169 ; X32: insertps $0, 4(%esp), %xmm0 172 ; X64: insertps $0, %xmm1, %xmm0 180 ; X32: insertps $0, %xmm1, %xmm0 183 ; X64: insertps $0, %xmm1, %xmm0 227 ; This used to compile to insertps $0 + insertps $16. insertps $0 is always 241 ; X32-NOT: insertps $0 [all …]
|
/external/swiftshader/third_party/LLVM/lib/Target/X86/ |
D | README-SSE.txt | 608 In sse4 mode, we could use insertps to make both better. 610 Here's another testcase that could use insertps [mem]: 621 insertps $0x10, x2(%rip), %xmm0 622 insertps $0x10, x3(%rip), %xmm1 911 insertps $0, %xmm2, %xmm2 912 insertps $16, %xmm3, %xmm2 913 insertps $0, %xmm0, %xmm3 914 insertps $16, %xmm1, %xmm3 922 The insertps's of $0 are pointless complex copies.
|
/external/llvm/lib/Target/X86/ |
D | README-SSE.txt | 542 In sse4 mode, we could use insertps to make both better. 544 Here's another testcase that could use insertps [mem]: 555 insertps $0x10, x2(%rip), %xmm0 556 insertps $0x10, x3(%rip), %xmm1
|
/external/swiftshader/third_party/LLVM/test/MC/Disassembler/X86/ |
D | x86-32.txt | 160 # CHECK: insertps $129, %xmm2, %xmm1
|
D | simple-tests.txt | 129 # CHECK: insertps $129, %xmm2, %xmm1
|
/external/elfutils/libcpu/ |
D | ChangeLog | 79 * defs/i386: Add dppd, dpps, insertps, movntdqa, mpsadbw, packusdw,
|
/external/llvm/test/MC/Disassembler/X86/ |
D | x86-32.txt | 202 # CHECK: insertps $129, %xmm2, %xmm1
|