1; Test the a=b*b lowering sequence which can use a single temporary register
2; instead of two registers.
3
4; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
5; RUN:   --target x8632 -i %s --args -O2 -mattr=sse4.1 \
6; RUN:   | %if --need=target_X8632 --command FileCheck %s
7
8; RUN: %if --need=target_X8632 --command %p2i --filetype=obj --disassemble \
9; RUN:   --target x8632 -i %s --args -Om1 -mattr=sse4.1 \
10; RUN:   | %if --need=target_X8632 --command FileCheck %s
11
12; RUN: %if --need=target_MIPS32 --need=allow_dump \
13; RUN:   --command %p2i --filetype=asm --assemble --disassemble --target \
14; RUN:   mips32 -i %s --args -O2 -allow-externally-defined-symbols \
15; RUN:   | %if --need=target_MIPS32 --need=allow_dump \
16; RUN:   --command FileCheck --check-prefix MIPS32 %s
17
18define internal float @Square_float(float %a) {
19entry:
20  %result = fmul float %a, %a
21  ret float %result
22}
23; CHECK-LABEL: Square_float
24; CHECK: mulss [[REG:xmm.]],[[REG]]
25; MIPS32-LABEL: Square_float
26; MIPS32: 	mov.s
27; MIPS32: 	mul.s
28
29define internal double @Square_double(double %a) {
30entry:
31  %result = fmul double %a, %a
32  ret double %result
33}
34; CHECK-LABEL: Square_double
35; CHECK: mulsd [[REG:xmm.]],[[REG]]
36; MIPS32-LABEL: Square_double
37; MIPS32: 	mov.d
38; MIPS32: 	mul.d
39
40define internal i32 @Square_i32(i32 %a) {
41entry:
42  %result = mul i32 %a, %a
43  ret i32 %result
44}
45; CHECK-LABEL: Square_i32
46; CHECK: imul [[REG:e..]],[[REG]]
47; MIPS32-LABEL: Square_i32
48; MIPS32: 	move
49; MIPS32: 	mul
50
51define internal i32 @Square_i16(i32 %a) {
52entry:
53  %a.16 = trunc i32 %a to i16
54  %result = mul i16 %a.16, %a.16
55  %result.i32 = sext i16 %result to i32
56  ret i32 %result.i32
57}
58; CHECK-LABEL: Square_i16
59; CHECK: imul [[REG:..]],[[REG]]
60; MIPS32-LABEL: Square_i16
61; MIPS32: 	move
62; MIPS32: 	mul
63; MIPS32: 	sll
64; MIPS32: 	sra
65
66define internal i32 @Square_i8(i32 %a) {
67entry:
68  %a.8 = trunc i32 %a to i8
69  %result = mul i8 %a.8, %a.8
70  %result.i32 = sext i8 %result to i32
71  ret i32 %result.i32
72}
73; CHECK-LABEL: Square_i8
74; CHECK: imul al
75; MIPS32-LABEL: Square_i8
76; MIPS32: 	move
77; MIPS32: 	mul
78; MIPS32: 	sll
79; MIPS32: 	sra
80
81define internal <4 x float> @Square_v4f32(<4 x float> %a) {
82entry:
83  %result = fmul <4 x float> %a, %a
84  ret <4 x float> %result
85}
86; CHECK-LABEL: Square_v4f32
87; CHECK: mulps [[REG:xmm.]],[[REG]]
88
89define internal <4 x i32> @Square_v4i32(<4 x i32> %a) {
90entry:
91  %result = mul <4 x i32> %a, %a
92  ret <4 x i32> %result
93}
94; CHECK-LABEL: Square_v4i32
95; CHECK: pmulld [[REG:xmm.]],[[REG]]
96
97define internal <8 x i16> @Square_v8i16(<8 x i16> %a) {
98entry:
99  %result = mul <8 x i16> %a, %a
100  ret <8 x i16> %result
101}
102; CHECK-LABEL: Square_v8i16
103; CHECK: pmullw [[REG:xmm.]],[[REG]]
104
105define internal <16 x i8> @Square_v16i8(<16 x i8> %a) {
106entry:
107  %result = mul <16 x i8> %a, %a
108  ret <16 x i8> %result
109}
110; CHECK-LABEL: Square_v16i8
111; CHECK-NOT: pmul
112