1; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
2;
3; Test that only one vperm of the vector compare is needed for both extracts.
4
5define void @fun() {
6; CHECK-LABEL: fun:
7; CHECK: vperm
8; CHECK-NOT: vperm
9bb:
10  %tmp = load <4 x i8>, <4 x i8>* undef
11  %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
12  %tmp2 = extractelement <4 x i1> %tmp1, i32 0
13  br i1 %tmp2, label %bb1, label %bb2
14
15bb1:
16  unreachable
17
18bb2:
19  %tmp3 = extractelement <4 x i1> %tmp1, i32 1
20  br i1 %tmp3, label %bb3, label %bb4
21
22bb3:
23  unreachable
24
25bb4:
26  unreachable
27}
28
29; Test that a zero index in the permute vector is used instead of VGBM, with
30; a zero index into the other source operand.
31define <4 x i8> @fun1(<2 x i8> %arg) {
32; CHECK-LABEL:.LCPI1_0:
33; CHECK-NEXT:        .byte   1                       # 0x1
34; CHECK-NEXT:        .byte   18                      # 0x12
35; CHECK-NEXT:        .byte   0                       # 0x0
36; CHECK-NEXT:        .byte   18                      # 0x12
37; CHECK-NEXT:        .space  1
38; CHECK-NEXT:        .space  1
39; CHECK-NEXT:        .space  1
40; CHECK-NEXT:        .space  1
41; CHECK-NEXT:        .space  1
42; CHECK-NEXT:        .space  1
43; CHECK-NEXT:        .space  1
44; CHECK-NEXT:        .space  1
45; CHECK-NEXT:        .space  1
46; CHECK-NEXT:        .space  1
47; CHECK-NEXT:        .space  1
48; CHECK-NEXT:        .space  1
49; CHECK-NEXT:        .text
50; CHECK-NEXT:        .globl  fun1
51; CHECK-NEXT:        .p2align        4
52; CHECK-NEXT:        .type   fun1,@function
53; CHECK-NEXT: fun1:                                  # @fun1
54; CHECK-NEXT:        .cfi_startproc
55; CHECK-NEXT: # %bb.0:
56; CHECK-NEXT:        larl    %r1, .LCPI1_0
57; CHECK-NEXT:        vl      %v0, 0(%r1), 3
58; CHECK-NEXT:        vperm   %v24, %v24, %v0, %v0
59; CHECK-NEXT:        br      %r14
60   %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
61                        <4 x i32> <i32 1, i32 2, i32 0, i32 3>
62   ret <4 x i8> %res
63}
64
65; Same, but with the first byte indexing into an element of the zero vector.
66define <4 x i8> @fun2(<2 x i8> %arg) {
67; CHECK-LABEL:.LCPI2_0:
68; CHECK-NEXT:        .byte   0                       # 0x0
69; CHECK-NEXT:        .byte   17                      # 0x11
70; CHECK-NEXT:        .byte   17                      # 0x11
71; CHECK-NEXT:        .byte   0                       # 0x0
72; CHECK-NEXT:        .space  1
73; CHECK-NEXT:        .space  1
74; CHECK-NEXT:        .space  1
75; CHECK-NEXT:        .space  1
76; CHECK-NEXT:        .space  1
77; CHECK-NEXT:        .space  1
78; CHECK-NEXT:        .space  1
79; CHECK-NEXT:        .space  1
80; CHECK-NEXT:        .space  1
81; CHECK-NEXT:        .space  1
82; CHECK-NEXT:        .space  1
83; CHECK-NEXT:        .space  1
84; CHECK-NEXT:        .text
85; CHECK-NEXT:        .globl  fun2
86; CHECK-NEXT:        .p2align        4
87; CHECK-NEXT:        .type   fun2,@function
88; CHECK-NEXT:fun2:                                   # @fun2
89; CHECK-NEXT:        .cfi_startproc
90; CHECK-NEXT:# %bb.0:
91; CHECK-NEXT:        larl    %r1, .LCPI2_0
92; CHECK-NEXT:        vl      %v0, 0(%r1), 3
93; CHECK-NEXT:        vperm   %v24, %v0, %v24, %v0
94; CHECK-NEXT:        br      %r14
95   %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
96                        <4 x i32> <i32 3, i32 1, i32 1, i32 2>
97   ret <4 x i8> %res
98}
99