1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -slp-vectorizer -mattr=+sse  < %s | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt -S -slp-vectorizer -mattr=+avx512f < %s | FileCheck %s --check-prefixes=CHECK,AVX512
4
5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6target triple = "x86_64-unknown-linux-gnu"
7
8; Function Attrs: norecurse nounwind readnone uwtable
9define zeroext i8 @foo(i32 %x, i32 %y, i32 %a, i32 %b) local_unnamed_addr #0 {
10; CHECK-LABEL: @foo(
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[Y:%.*]]
13; CHECK-NEXT:    [[B_A:%.*]] = select i1 [[CMP]], i32 [[B:%.*]], i32 [[A:%.*]]
14; CHECK-NEXT:    [[RETVAL_0:%.*]] = trunc i32 [[B_A]] to i8
15; CHECK-NEXT:    ret i8 [[RETVAL_0]]
16;
17entry:
18  %cmp = icmp slt i32 %x, %y
19  %b.a = select i1 %cmp, i32 %b, i32 %a
20  %retval.0 = trunc i32 %b.a to i8
21  ret i8 %retval.0
22}
23
24define void @bar(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture readonly %c, i8* noalias nocapture readonly %d, i8* noalias nocapture %e, i32 %w) local_unnamed_addr #1 {
25; SSE-LABEL: @bar(
26; SSE-NEXT:  entry:
27; SSE-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> undef, i32 [[W:%.*]], i32 0
28; SSE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[W]], i32 1
29; SSE-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[W]], i32 2
30; SSE-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[W]], i32 3
31; SSE-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> undef, i32 [[W]], i32 0
32; SSE-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[W]], i32 1
33; SSE-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[W]], i32 2
34; SSE-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[W]], i32 3
35; SSE-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> undef, i32 [[W]], i32 0
36; SSE-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[W]], i32 1
37; SSE-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[W]], i32 2
38; SSE-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[W]], i32 3
39; SSE-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> undef, i32 [[W]], i32 0
40; SSE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[W]], i32 1
41; SSE-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[W]], i32 2
42; SSE-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[W]], i32 3
43; SSE-NEXT:    br label [[FOR_BODY:%.*]]
44; SSE:       for.body:
45; SSE-NEXT:    [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
46; SSE-NEXT:    [[A_ADDR_0355:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
47; SSE-NEXT:    [[E_ADDR_0354:%.*]] = phi i8* [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
48; SSE-NEXT:    [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
49; SSE-NEXT:    [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
50; SSE-NEXT:    [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
51; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
52; SSE-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
53; SSE-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
54; SSE-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
55; SSE-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
56; SSE-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
57; SSE-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
58; SSE-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
59; SSE-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
60; SSE-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
61; SSE-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
62; SSE-NEXT:    [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <4 x i8>*
63; SSE-NEXT:    [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1
64; SSE-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
65; SSE-NEXT:    [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <4 x i8>*
66; SSE-NEXT:    [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
67; SSE-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
68; SSE-NEXT:    [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <4 x i8>*
69; SSE-NEXT:    [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
70; SSE-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
71; SSE-NEXT:    [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <4 x i8>*
72; SSE-NEXT:    [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1
73; SSE-NEXT:    [[TMP24:%.*]] = icmp ult <4 x i8> [[TMP17]], [[TMP19]]
74; SSE-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP24]], <4 x i8> [[TMP23]], <4 x i8> [[TMP21]]
75; SSE-NEXT:    [[TMP26:%.*]] = zext <4 x i8> [[TMP25]] to <4 x i32>
76; SSE-NEXT:    [[TMP27:%.*]] = mul <4 x i32> [[TMP26]], [[TMP3]]
77; SSE-NEXT:    [[TMP28:%.*]] = trunc <4 x i32> [[TMP27]] to <4 x i8>
78; SSE-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
79; SSE-NEXT:    [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <4 x i8>*
80; SSE-NEXT:    store <4 x i8> [[TMP28]], <4 x i8>* [[TMP29]], align 1
81; SSE-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
82; SSE-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
83; SSE-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
84; SSE-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
85; SSE-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
86; SSE-NEXT:    [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
87; SSE-NEXT:    [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
88; SSE-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
89; SSE-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
90; SSE-NEXT:    [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
91; SSE-NEXT:    [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
92; SSE-NEXT:    [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
93; SSE-NEXT:    [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
94; SSE-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
95; SSE-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
96; SSE-NEXT:    [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
97; SSE-NEXT:    [[TMP30:%.*]] = bitcast i8* [[ARRAYIDX45]] to <4 x i8>*
98; SSE-NEXT:    [[TMP31:%.*]] = load <4 x i8>, <4 x i8>* [[TMP30]], align 1
99; SSE-NEXT:    [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
100; SSE-NEXT:    [[TMP32:%.*]] = bitcast i8* [[ARRAYIDX47]] to <4 x i8>*
101; SSE-NEXT:    [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
102; SSE-NEXT:    [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
103; SSE-NEXT:    [[TMP34:%.*]] = bitcast i8* [[ARRAYIDX49]] to <4 x i8>*
104; SSE-NEXT:    [[TMP35:%.*]] = load <4 x i8>, <4 x i8>* [[TMP34]], align 1
105; SSE-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
106; SSE-NEXT:    [[TMP36:%.*]] = bitcast i8* [[ARRAYIDX52]] to <4 x i8>*
107; SSE-NEXT:    [[TMP37:%.*]] = load <4 x i8>, <4 x i8>* [[TMP36]], align 1
108; SSE-NEXT:    [[TMP38:%.*]] = icmp ult <4 x i8> [[TMP31]], [[TMP33]]
109; SSE-NEXT:    [[TMP39:%.*]] = select <4 x i1> [[TMP38]], <4 x i8> [[TMP37]], <4 x i8> [[TMP35]]
110; SSE-NEXT:    [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i32>
111; SSE-NEXT:    [[TMP41:%.*]] = mul <4 x i32> [[TMP40]], [[TMP7]]
112; SSE-NEXT:    [[TMP42:%.*]] = trunc <4 x i32> [[TMP41]] to <4 x i8>
113; SSE-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
114; SSE-NEXT:    [[TMP43:%.*]] = bitcast i8* [[ARRAYIDX56]] to <4 x i8>*
115; SSE-NEXT:    store <4 x i8> [[TMP42]], <4 x i8>* [[TMP43]], align 1
116; SSE-NEXT:    [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
117; SSE-NEXT:    [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
118; SSE-NEXT:    [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
119; SSE-NEXT:    [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
120; SSE-NEXT:    [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
121; SSE-NEXT:    [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
122; SSE-NEXT:    [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
123; SSE-NEXT:    [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
124; SSE-NEXT:    [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
125; SSE-NEXT:    [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
126; SSE-NEXT:    [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
127; SSE-NEXT:    [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
128; SSE-NEXT:    [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
129; SSE-NEXT:    [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
130; SSE-NEXT:    [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
131; SSE-NEXT:    [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
132; SSE-NEXT:    [[TMP44:%.*]] = bitcast i8* [[ARRAYIDX93]] to <4 x i8>*
133; SSE-NEXT:    [[TMP45:%.*]] = load <4 x i8>, <4 x i8>* [[TMP44]], align 1
134; SSE-NEXT:    [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
135; SSE-NEXT:    [[TMP46:%.*]] = bitcast i8* [[ARRAYIDX95]] to <4 x i8>*
136; SSE-NEXT:    [[TMP47:%.*]] = load <4 x i8>, <4 x i8>* [[TMP46]], align 1
137; SSE-NEXT:    [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
138; SSE-NEXT:    [[TMP48:%.*]] = bitcast i8* [[ARRAYIDX97]] to <4 x i8>*
139; SSE-NEXT:    [[TMP49:%.*]] = load <4 x i8>, <4 x i8>* [[TMP48]], align 1
140; SSE-NEXT:    [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
141; SSE-NEXT:    [[TMP50:%.*]] = bitcast i8* [[ARRAYIDX100]] to <4 x i8>*
142; SSE-NEXT:    [[TMP51:%.*]] = load <4 x i8>, <4 x i8>* [[TMP50]], align 1
143; SSE-NEXT:    [[TMP52:%.*]] = icmp ult <4 x i8> [[TMP45]], [[TMP47]]
144; SSE-NEXT:    [[TMP53:%.*]] = select <4 x i1> [[TMP52]], <4 x i8> [[TMP51]], <4 x i8> [[TMP49]]
145; SSE-NEXT:    [[TMP54:%.*]] = zext <4 x i8> [[TMP53]] to <4 x i32>
146; SSE-NEXT:    [[TMP55:%.*]] = mul <4 x i32> [[TMP54]], [[TMP11]]
147; SSE-NEXT:    [[TMP56:%.*]] = trunc <4 x i32> [[TMP55]] to <4 x i8>
148; SSE-NEXT:    [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
149; SSE-NEXT:    [[TMP57:%.*]] = bitcast i8* [[ARRAYIDX104]] to <4 x i8>*
150; SSE-NEXT:    store <4 x i8> [[TMP56]], <4 x i8>* [[TMP57]], align 1
151; SSE-NEXT:    [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
152; SSE-NEXT:    [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
153; SSE-NEXT:    [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
154; SSE-NEXT:    [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
155; SSE-NEXT:    [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
156; SSE-NEXT:    [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
157; SSE-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
158; SSE-NEXT:    [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
159; SSE-NEXT:    [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
160; SSE-NEXT:    [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
161; SSE-NEXT:    [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
162; SSE-NEXT:    [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
163; SSE-NEXT:    [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
164; SSE-NEXT:    [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
165; SSE-NEXT:    [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
166; SSE-NEXT:    [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
167; SSE-NEXT:    [[TMP58:%.*]] = bitcast i8* [[ARRAYIDX141]] to <4 x i8>*
168; SSE-NEXT:    [[TMP59:%.*]] = load <4 x i8>, <4 x i8>* [[TMP58]], align 1
169; SSE-NEXT:    [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
170; SSE-NEXT:    [[TMP60:%.*]] = bitcast i8* [[ARRAYIDX143]] to <4 x i8>*
171; SSE-NEXT:    [[TMP61:%.*]] = load <4 x i8>, <4 x i8>* [[TMP60]], align 1
172; SSE-NEXT:    [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
173; SSE-NEXT:    [[TMP62:%.*]] = bitcast i8* [[ARRAYIDX145]] to <4 x i8>*
174; SSE-NEXT:    [[TMP63:%.*]] = load <4 x i8>, <4 x i8>* [[TMP62]], align 1
175; SSE-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
176; SSE-NEXT:    [[TMP64:%.*]] = bitcast i8* [[ARRAYIDX148]] to <4 x i8>*
177; SSE-NEXT:    [[TMP65:%.*]] = load <4 x i8>, <4 x i8>* [[TMP64]], align 1
178; SSE-NEXT:    [[TMP66:%.*]] = icmp ult <4 x i8> [[TMP59]], [[TMP61]]
179; SSE-NEXT:    [[TMP67:%.*]] = select <4 x i1> [[TMP66]], <4 x i8> [[TMP65]], <4 x i8> [[TMP63]]
180; SSE-NEXT:    [[TMP68:%.*]] = zext <4 x i8> [[TMP67]] to <4 x i32>
181; SSE-NEXT:    [[TMP69:%.*]] = mul <4 x i32> [[TMP68]], [[TMP15]]
182; SSE-NEXT:    [[TMP70:%.*]] = trunc <4 x i32> [[TMP69]] to <4 x i8>
183; SSE-NEXT:    [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
184; SSE-NEXT:    [[TMP71:%.*]] = bitcast i8* [[ARRAYIDX152]] to <4 x i8>*
185; SSE-NEXT:    store <4 x i8> [[TMP70]], <4 x i8>* [[TMP71]], align 1
186; SSE-NEXT:    [[INC]] = add nuw nsw i32 [[I_0356]], 1
187; SSE-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
188; SSE-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
189; SSE-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 16
190; SSE-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 16
191; SSE-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 16
192; SSE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 8
193; SSE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
194; SSE:       for.end:
195; SSE-NEXT:    ret void
196;
197; AVX512-LABEL: @bar(
198; AVX512-NEXT:  entry:
199; AVX512-NEXT:    [[TMP0:%.*]] = insertelement <16 x i32> undef, i32 [[W:%.*]], i32 0
200; AVX512-NEXT:    [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 [[W]], i32 1
201; AVX512-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[W]], i32 2
202; AVX512-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[W]], i32 3
203; AVX512-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[W]], i32 4
204; AVX512-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[W]], i32 5
205; AVX512-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[W]], i32 6
206; AVX512-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[W]], i32 7
207; AVX512-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[W]], i32 8
208; AVX512-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[W]], i32 9
209; AVX512-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[W]], i32 10
210; AVX512-NEXT:    [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[W]], i32 11
211; AVX512-NEXT:    [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 [[W]], i32 12
212; AVX512-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 [[W]], i32 13
213; AVX512-NEXT:    [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 [[W]], i32 14
214; AVX512-NEXT:    [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 [[W]], i32 15
215; AVX512-NEXT:    br label [[FOR_BODY:%.*]]
216; AVX512:       for.body:
217; AVX512-NEXT:    [[I_0356:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
218; AVX512-NEXT:    [[A_ADDR_0355:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
219; AVX512-NEXT:    [[E_ADDR_0354:%.*]] = phi i8* [ [[E:%.*]], [[ENTRY]] ], [ [[ADD_PTR192:%.*]], [[FOR_BODY]] ]
220; AVX512-NEXT:    [[D_ADDR_0353:%.*]] = phi i8* [ [[D:%.*]], [[ENTRY]] ], [ [[ADD_PTR191:%.*]], [[FOR_BODY]] ]
221; AVX512-NEXT:    [[C_ADDR_0352:%.*]] = phi i8* [ [[C:%.*]], [[ENTRY]] ], [ [[ADD_PTR190:%.*]], [[FOR_BODY]] ]
222; AVX512-NEXT:    [[B_ADDR_0351:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[ADD_PTR189:%.*]], [[FOR_BODY]] ]
223; AVX512-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 1
224; AVX512-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 1
225; AVX512-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 1
226; AVX512-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 1
227; AVX512-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 1
228; AVX512-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 2
229; AVX512-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 2
230; AVX512-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 2
231; AVX512-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 2
232; AVX512-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 2
233; AVX512-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 3
234; AVX512-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 3
235; AVX512-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 3
236; AVX512-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 3
237; AVX512-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 3
238; AVX512-NEXT:    [[ARRAYIDX45:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 4
239; AVX512-NEXT:    [[ARRAYIDX47:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 4
240; AVX512-NEXT:    [[ARRAYIDX49:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 4
241; AVX512-NEXT:    [[ARRAYIDX52:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 4
242; AVX512-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 4
243; AVX512-NEXT:    [[ARRAYIDX57:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 5
244; AVX512-NEXT:    [[ARRAYIDX59:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 5
245; AVX512-NEXT:    [[ARRAYIDX61:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 5
246; AVX512-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 5
247; AVX512-NEXT:    [[ARRAYIDX68:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 5
248; AVX512-NEXT:    [[ARRAYIDX69:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 6
249; AVX512-NEXT:    [[ARRAYIDX71:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 6
250; AVX512-NEXT:    [[ARRAYIDX73:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 6
251; AVX512-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 6
252; AVX512-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 6
253; AVX512-NEXT:    [[ARRAYIDX81:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 7
254; AVX512-NEXT:    [[ARRAYIDX83:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 7
255; AVX512-NEXT:    [[ARRAYIDX85:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 7
256; AVX512-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 7
257; AVX512-NEXT:    [[ARRAYIDX92:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 7
258; AVX512-NEXT:    [[ARRAYIDX93:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 8
259; AVX512-NEXT:    [[ARRAYIDX95:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 8
260; AVX512-NEXT:    [[ARRAYIDX97:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 8
261; AVX512-NEXT:    [[ARRAYIDX100:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 8
262; AVX512-NEXT:    [[ARRAYIDX104:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 8
263; AVX512-NEXT:    [[ARRAYIDX105:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 9
264; AVX512-NEXT:    [[ARRAYIDX107:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 9
265; AVX512-NEXT:    [[ARRAYIDX109:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 9
266; AVX512-NEXT:    [[ARRAYIDX112:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 9
267; AVX512-NEXT:    [[ARRAYIDX116:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 9
268; AVX512-NEXT:    [[ARRAYIDX117:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 10
269; AVX512-NEXT:    [[ARRAYIDX119:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 10
270; AVX512-NEXT:    [[ARRAYIDX121:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 10
271; AVX512-NEXT:    [[ARRAYIDX124:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 10
272; AVX512-NEXT:    [[ARRAYIDX128:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 10
273; AVX512-NEXT:    [[ARRAYIDX129:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 11
274; AVX512-NEXT:    [[ARRAYIDX131:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 11
275; AVX512-NEXT:    [[ARRAYIDX133:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 11
276; AVX512-NEXT:    [[ARRAYIDX136:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 11
277; AVX512-NEXT:    [[ARRAYIDX140:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 11
278; AVX512-NEXT:    [[ARRAYIDX141:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 12
279; AVX512-NEXT:    [[ARRAYIDX143:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 12
280; AVX512-NEXT:    [[ARRAYIDX145:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 12
281; AVX512-NEXT:    [[ARRAYIDX148:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 12
282; AVX512-NEXT:    [[ARRAYIDX152:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 12
283; AVX512-NEXT:    [[ARRAYIDX153:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 13
284; AVX512-NEXT:    [[ARRAYIDX155:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 13
285; AVX512-NEXT:    [[ARRAYIDX157:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 13
286; AVX512-NEXT:    [[ARRAYIDX160:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 13
287; AVX512-NEXT:    [[ARRAYIDX164:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 13
288; AVX512-NEXT:    [[ARRAYIDX165:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 14
289; AVX512-NEXT:    [[ARRAYIDX167:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 14
290; AVX512-NEXT:    [[ARRAYIDX169:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 14
291; AVX512-NEXT:    [[ARRAYIDX172:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 14
292; AVX512-NEXT:    [[ARRAYIDX176:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 14
293; AVX512-NEXT:    [[ARRAYIDX177:%.*]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 15
294; AVX512-NEXT:    [[TMP16:%.*]] = bitcast i8* [[C_ADDR_0352]] to <16 x i8>*
295; AVX512-NEXT:    [[TMP17:%.*]] = load <16 x i8>, <16 x i8>* [[TMP16]], align 1
296; AVX512-NEXT:    [[ARRAYIDX179:%.*]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 15
297; AVX512-NEXT:    [[TMP18:%.*]] = bitcast i8* [[D_ADDR_0353]] to <16 x i8>*
298; AVX512-NEXT:    [[TMP19:%.*]] = load <16 x i8>, <16 x i8>* [[TMP18]], align 1
299; AVX512-NEXT:    [[ARRAYIDX181:%.*]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 15
300; AVX512-NEXT:    [[TMP20:%.*]] = bitcast i8* [[A_ADDR_0355]] to <16 x i8>*
301; AVX512-NEXT:    [[TMP21:%.*]] = load <16 x i8>, <16 x i8>* [[TMP20]], align 1
302; AVX512-NEXT:    [[ARRAYIDX184:%.*]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 15
303; AVX512-NEXT:    [[TMP22:%.*]] = bitcast i8* [[B_ADDR_0351]] to <16 x i8>*
304; AVX512-NEXT:    [[TMP23:%.*]] = load <16 x i8>, <16 x i8>* [[TMP22]], align 1
305; AVX512-NEXT:    [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]]
306; AVX512-NEXT:    [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]]
307; AVX512-NEXT:    [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32>
308; AVX512-NEXT:    [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[TMP15]]
309; AVX512-NEXT:    [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8>
310; AVX512-NEXT:    [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15
311; AVX512-NEXT:    [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>*
312; AVX512-NEXT:    store <16 x i8> [[TMP28]], <16 x i8>* [[TMP29]], align 1
313; AVX512-NEXT:    [[INC]] = add nuw nsw i32 [[I_0356]], 1
314; AVX512-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_0355]], i64 16
315; AVX512-NEXT:    [[ADD_PTR189]] = getelementptr inbounds i8, i8* [[B_ADDR_0351]], i64 16
316; AVX512-NEXT:    [[ADD_PTR190]] = getelementptr inbounds i8, i8* [[C_ADDR_0352]], i64 16
317; AVX512-NEXT:    [[ADD_PTR191]] = getelementptr inbounds i8, i8* [[D_ADDR_0353]], i64 16
318; AVX512-NEXT:    [[ADD_PTR192]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 16
319; AVX512-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 8
320; AVX512-NEXT:    br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
321; AVX512:       for.end:
322; AVX512-NEXT:    ret void
323;
324entry:
325  br label %for.body
326
327for.body:                                         ; preds = %for.body, %entry
328  %i.0356 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
329  %a.addr.0355 = phi i8* [ %a, %entry ], [ %add.ptr, %for.body ]
330  %e.addr.0354 = phi i8* [ %e, %entry ], [ %add.ptr192, %for.body ]
331  %d.addr.0353 = phi i8* [ %d, %entry ], [ %add.ptr191, %for.body ]
332  %c.addr.0352 = phi i8* [ %c, %entry ], [ %add.ptr190, %for.body ]
333  %b.addr.0351 = phi i8* [ %b, %entry ], [ %add.ptr189, %for.body ]
334  %0 = load i8, i8* %c.addr.0352, align 1
335  %1 = load i8, i8* %d.addr.0353, align 1
336  %2 = load i8, i8* %a.addr.0355, align 1
337  %3 = load i8, i8* %b.addr.0351, align 1
338  %cmp.i = icmp ult i8 %0, %1
339  %b.a.i.v.v = select i1 %cmp.i, i8 %3, i8 %2
340  %b.a.i.v = zext i8 %b.a.i.v.v to i32
341  %b.a.i = mul i32 %b.a.i.v, %w
342  %retval.0.i = trunc i32 %b.a.i to i8
343  store i8 %retval.0.i, i8* %e.addr.0354, align 1
344  %arrayidx9 = getelementptr inbounds i8, i8* %c.addr.0352, i64 1
345  %4 = load i8, i8* %arrayidx9, align 1
346  %arrayidx11 = getelementptr inbounds i8, i8* %d.addr.0353, i64 1
347  %5 = load i8, i8* %arrayidx11, align 1
348  %arrayidx13 = getelementptr inbounds i8, i8* %a.addr.0355, i64 1
349  %6 = load i8, i8* %arrayidx13, align 1
350  %arrayidx16 = getelementptr inbounds i8, i8* %b.addr.0351, i64 1
351  %7 = load i8, i8* %arrayidx16, align 1
352  %cmp.i348 = icmp ult i8 %4, %5
353  %b.a.i349.v.v = select i1 %cmp.i348, i8 %7, i8 %6
354  %b.a.i349.v = zext i8 %b.a.i349.v.v to i32
355  %b.a.i349 = mul i32 %b.a.i349.v, %w
356  %retval.0.i350 = trunc i32 %b.a.i349 to i8
357  %arrayidx20 = getelementptr inbounds i8, i8* %e.addr.0354, i64 1
358  store i8 %retval.0.i350, i8* %arrayidx20, align 1
359  %arrayidx21 = getelementptr inbounds i8, i8* %c.addr.0352, i64 2
360  %8 = load i8, i8* %arrayidx21, align 1
361  %arrayidx23 = getelementptr inbounds i8, i8* %d.addr.0353, i64 2
362  %9 = load i8, i8* %arrayidx23, align 1
363  %arrayidx25 = getelementptr inbounds i8, i8* %a.addr.0355, i64 2
364  %10 = load i8, i8* %arrayidx25, align 1
365  %arrayidx28 = getelementptr inbounds i8, i8* %b.addr.0351, i64 2
366  %11 = load i8, i8* %arrayidx28, align 1
367  %cmp.i345 = icmp ult i8 %8, %9
368  %b.a.i346.v.v = select i1 %cmp.i345, i8 %11, i8 %10
369  %b.a.i346.v = zext i8 %b.a.i346.v.v to i32
370  %b.a.i346 = mul i32 %b.a.i346.v, %w
371  %retval.0.i347 = trunc i32 %b.a.i346 to i8
372  %arrayidx32 = getelementptr inbounds i8, i8* %e.addr.0354, i64 2
373  store i8 %retval.0.i347, i8* %arrayidx32, align 1
374  %arrayidx33 = getelementptr inbounds i8, i8* %c.addr.0352, i64 3
375  %12 = load i8, i8* %arrayidx33, align 1
376  %arrayidx35 = getelementptr inbounds i8, i8* %d.addr.0353, i64 3
377  %13 = load i8, i8* %arrayidx35, align 1
378  %arrayidx37 = getelementptr inbounds i8, i8* %a.addr.0355, i64 3
379  %14 = load i8, i8* %arrayidx37, align 1
380  %arrayidx40 = getelementptr inbounds i8, i8* %b.addr.0351, i64 3
381  %15 = load i8, i8* %arrayidx40, align 1
382  %cmp.i342 = icmp ult i8 %12, %13
383  %b.a.i343.v.v = select i1 %cmp.i342, i8 %15, i8 %14
384  %b.a.i343.v = zext i8 %b.a.i343.v.v to i32
385  %b.a.i343 = mul i32 %b.a.i343.v, %w
386  %retval.0.i344 = trunc i32 %b.a.i343 to i8
387  %arrayidx44 = getelementptr inbounds i8, i8* %e.addr.0354, i64 3
388  store i8 %retval.0.i344, i8* %arrayidx44, align 1
389  %arrayidx45 = getelementptr inbounds i8, i8* %c.addr.0352, i64 4
390  %16 = load i8, i8* %arrayidx45, align 1
391  %arrayidx47 = getelementptr inbounds i8, i8* %d.addr.0353, i64 4
392  %17 = load i8, i8* %arrayidx47, align 1
393  %arrayidx49 = getelementptr inbounds i8, i8* %a.addr.0355, i64 4
394  %18 = load i8, i8* %arrayidx49, align 1
395  %arrayidx52 = getelementptr inbounds i8, i8* %b.addr.0351, i64 4
396  %19 = load i8, i8* %arrayidx52, align 1
397  %cmp.i339 = icmp ult i8 %16, %17
398  %b.a.i340.v.v = select i1 %cmp.i339, i8 %19, i8 %18
399  %b.a.i340.v = zext i8 %b.a.i340.v.v to i32
400  %b.a.i340 = mul i32 %b.a.i340.v, %w
401  %retval.0.i341 = trunc i32 %b.a.i340 to i8
402  %arrayidx56 = getelementptr inbounds i8, i8* %e.addr.0354, i64 4
403  store i8 %retval.0.i341, i8* %arrayidx56, align 1
404  %arrayidx57 = getelementptr inbounds i8, i8* %c.addr.0352, i64 5
405  %20 = load i8, i8* %arrayidx57, align 1
406  %arrayidx59 = getelementptr inbounds i8, i8* %d.addr.0353, i64 5
407  %21 = load i8, i8* %arrayidx59, align 1
408  %arrayidx61 = getelementptr inbounds i8, i8* %a.addr.0355, i64 5
409  %22 = load i8, i8* %arrayidx61, align 1
410  %arrayidx64 = getelementptr inbounds i8, i8* %b.addr.0351, i64 5
411  %23 = load i8, i8* %arrayidx64, align 1
412  %cmp.i336 = icmp ult i8 %20, %21
413  %b.a.i337.v.v = select i1 %cmp.i336, i8 %23, i8 %22
414  %b.a.i337.v = zext i8 %b.a.i337.v.v to i32
415  %b.a.i337 = mul i32 %b.a.i337.v, %w
416  %retval.0.i338 = trunc i32 %b.a.i337 to i8
417  %arrayidx68 = getelementptr inbounds i8, i8* %e.addr.0354, i64 5
418  store i8 %retval.0.i338, i8* %arrayidx68, align 1
419  %arrayidx69 = getelementptr inbounds i8, i8* %c.addr.0352, i64 6
420  %24 = load i8, i8* %arrayidx69, align 1
421  %arrayidx71 = getelementptr inbounds i8, i8* %d.addr.0353, i64 6
422  %25 = load i8, i8* %arrayidx71, align 1
423  %arrayidx73 = getelementptr inbounds i8, i8* %a.addr.0355, i64 6
424  %26 = load i8, i8* %arrayidx73, align 1
425  %arrayidx76 = getelementptr inbounds i8, i8* %b.addr.0351, i64 6
426  %27 = load i8, i8* %arrayidx76, align 1
427  %cmp.i333 = icmp ult i8 %24, %25
428  %b.a.i334.v.v = select i1 %cmp.i333, i8 %27, i8 %26
429  %b.a.i334.v = zext i8 %b.a.i334.v.v to i32
430  %b.a.i334 = mul i32 %b.a.i334.v, %w
431  %retval.0.i335 = trunc i32 %b.a.i334 to i8
432  %arrayidx80 = getelementptr inbounds i8, i8* %e.addr.0354, i64 6
433  store i8 %retval.0.i335, i8* %arrayidx80, align 1
434  %arrayidx81 = getelementptr inbounds i8, i8* %c.addr.0352, i64 7
435  %28 = load i8, i8* %arrayidx81, align 1
436  %arrayidx83 = getelementptr inbounds i8, i8* %d.addr.0353, i64 7
437  %29 = load i8, i8* %arrayidx83, align 1
438  %arrayidx85 = getelementptr inbounds i8, i8* %a.addr.0355, i64 7
439  %30 = load i8, i8* %arrayidx85, align 1
440  %arrayidx88 = getelementptr inbounds i8, i8* %b.addr.0351, i64 7
441  %31 = load i8, i8* %arrayidx88, align 1
442  %cmp.i330 = icmp ult i8 %28, %29
443  %b.a.i331.v.v = select i1 %cmp.i330, i8 %31, i8 %30
444  %b.a.i331.v = zext i8 %b.a.i331.v.v to i32
445  %b.a.i331 = mul i32 %b.a.i331.v, %w
446  %retval.0.i332 = trunc i32 %b.a.i331 to i8
447  %arrayidx92 = getelementptr inbounds i8, i8* %e.addr.0354, i64 7
448  store i8 %retval.0.i332, i8* %arrayidx92, align 1
449  %arrayidx93 = getelementptr inbounds i8, i8* %c.addr.0352, i64 8
450  %32 = load i8, i8* %arrayidx93, align 1
451  %arrayidx95 = getelementptr inbounds i8, i8* %d.addr.0353, i64 8
452  %33 = load i8, i8* %arrayidx95, align 1
453  %arrayidx97 = getelementptr inbounds i8, i8* %a.addr.0355, i64 8
454  %34 = load i8, i8* %arrayidx97, align 1
455  %arrayidx100 = getelementptr inbounds i8, i8* %b.addr.0351, i64 8
456  %35 = load i8, i8* %arrayidx100, align 1
457  %cmp.i327 = icmp ult i8 %32, %33
458  %b.a.i328.v.v = select i1 %cmp.i327, i8 %35, i8 %34
459  %b.a.i328.v = zext i8 %b.a.i328.v.v to i32
460  %b.a.i328 = mul i32 %b.a.i328.v, %w
461  %retval.0.i329 = trunc i32 %b.a.i328 to i8
462  %arrayidx104 = getelementptr inbounds i8, i8* %e.addr.0354, i64 8
463  store i8 %retval.0.i329, i8* %arrayidx104, align 1
464  %arrayidx105 = getelementptr inbounds i8, i8* %c.addr.0352, i64 9
465  %36 = load i8, i8* %arrayidx105, align 1
466  %arrayidx107 = getelementptr inbounds i8, i8* %d.addr.0353, i64 9
467  %37 = load i8, i8* %arrayidx107, align 1
468  %arrayidx109 = getelementptr inbounds i8, i8* %a.addr.0355, i64 9
469  %38 = load i8, i8* %arrayidx109, align 1
470  %arrayidx112 = getelementptr inbounds i8, i8* %b.addr.0351, i64 9
471  %39 = load i8, i8* %arrayidx112, align 1
472  %cmp.i324 = icmp ult i8 %36, %37
473  %b.a.i325.v.v = select i1 %cmp.i324, i8 %39, i8 %38
474  %b.a.i325.v = zext i8 %b.a.i325.v.v to i32
475  %b.a.i325 = mul i32 %b.a.i325.v, %w
476  %retval.0.i326 = trunc i32 %b.a.i325 to i8
477  %arrayidx116 = getelementptr inbounds i8, i8* %e.addr.0354, i64 9
478  store i8 %retval.0.i326, i8* %arrayidx116, align 1
479  %arrayidx117 = getelementptr inbounds i8, i8* %c.addr.0352, i64 10
480  %40 = load i8, i8* %arrayidx117, align 1
481  %arrayidx119 = getelementptr inbounds i8, i8* %d.addr.0353, i64 10
482  %41 = load i8, i8* %arrayidx119, align 1
483  %arrayidx121 = getelementptr inbounds i8, i8* %a.addr.0355, i64 10
484  %42 = load i8, i8* %arrayidx121, align 1
485  %arrayidx124 = getelementptr inbounds i8, i8* %b.addr.0351, i64 10
486  %43 = load i8, i8* %arrayidx124, align 1
487  %cmp.i321 = icmp ult i8 %40, %41
488  %b.a.i322.v.v = select i1 %cmp.i321, i8 %43, i8 %42
489  %b.a.i322.v = zext i8 %b.a.i322.v.v to i32
490  %b.a.i322 = mul i32 %b.a.i322.v, %w
491  %retval.0.i323 = trunc i32 %b.a.i322 to i8
492  %arrayidx128 = getelementptr inbounds i8, i8* %e.addr.0354, i64 10
493  store i8 %retval.0.i323, i8* %arrayidx128, align 1
494  %arrayidx129 = getelementptr inbounds i8, i8* %c.addr.0352, i64 11
495  %44 = load i8, i8* %arrayidx129, align 1
496  %arrayidx131 = getelementptr inbounds i8, i8* %d.addr.0353, i64 11
497  %45 = load i8, i8* %arrayidx131, align 1
498  %arrayidx133 = getelementptr inbounds i8, i8* %a.addr.0355, i64 11
499  %46 = load i8, i8* %arrayidx133, align 1
500  %arrayidx136 = getelementptr inbounds i8, i8* %b.addr.0351, i64 11
501  %47 = load i8, i8* %arrayidx136, align 1
502  %cmp.i318 = icmp ult i8 %44, %45
503  %b.a.i319.v.v = select i1 %cmp.i318, i8 %47, i8 %46
504  %b.a.i319.v = zext i8 %b.a.i319.v.v to i32
505  %b.a.i319 = mul i32 %b.a.i319.v, %w
506  %retval.0.i320 = trunc i32 %b.a.i319 to i8
507  %arrayidx140 = getelementptr inbounds i8, i8* %e.addr.0354, i64 11
508  store i8 %retval.0.i320, i8* %arrayidx140, align 1
509  %arrayidx141 = getelementptr inbounds i8, i8* %c.addr.0352, i64 12
510  %48 = load i8, i8* %arrayidx141, align 1
511  %arrayidx143 = getelementptr inbounds i8, i8* %d.addr.0353, i64 12
512  %49 = load i8, i8* %arrayidx143, align 1
513  %arrayidx145 = getelementptr inbounds i8, i8* %a.addr.0355, i64 12
514  %50 = load i8, i8* %arrayidx145, align 1
515  %arrayidx148 = getelementptr inbounds i8, i8* %b.addr.0351, i64 12
516  %51 = load i8, i8* %arrayidx148, align 1
517  %cmp.i315 = icmp ult i8 %48, %49
518  %b.a.i316.v.v = select i1 %cmp.i315, i8 %51, i8 %50
519  %b.a.i316.v = zext i8 %b.a.i316.v.v to i32
520  %b.a.i316 = mul i32 %b.a.i316.v, %w
521  %retval.0.i317 = trunc i32 %b.a.i316 to i8
522  %arrayidx152 = getelementptr inbounds i8, i8* %e.addr.0354, i64 12
523  store i8 %retval.0.i317, i8* %arrayidx152, align 1
524  %arrayidx153 = getelementptr inbounds i8, i8* %c.addr.0352, i64 13
525  %52 = load i8, i8* %arrayidx153, align 1
526  %arrayidx155 = getelementptr inbounds i8, i8* %d.addr.0353, i64 13
527  %53 = load i8, i8* %arrayidx155, align 1
528  %arrayidx157 = getelementptr inbounds i8, i8* %a.addr.0355, i64 13
529  %54 = load i8, i8* %arrayidx157, align 1
530  %arrayidx160 = getelementptr inbounds i8, i8* %b.addr.0351, i64 13
531  %55 = load i8, i8* %arrayidx160, align 1
532  %cmp.i312 = icmp ult i8 %52, %53
533  %b.a.i313.v.v = select i1 %cmp.i312, i8 %55, i8 %54
534  %b.a.i313.v = zext i8 %b.a.i313.v.v to i32
535  %b.a.i313 = mul i32 %b.a.i313.v, %w
536  %retval.0.i314 = trunc i32 %b.a.i313 to i8
537  %arrayidx164 = getelementptr inbounds i8, i8* %e.addr.0354, i64 13
538  store i8 %retval.0.i314, i8* %arrayidx164, align 1
539  %arrayidx165 = getelementptr inbounds i8, i8* %c.addr.0352, i64 14
540  %56 = load i8, i8* %arrayidx165, align 1
541  %arrayidx167 = getelementptr inbounds i8, i8* %d.addr.0353, i64 14
542  %57 = load i8, i8* %arrayidx167, align 1
543  %arrayidx169 = getelementptr inbounds i8, i8* %a.addr.0355, i64 14
544  %58 = load i8, i8* %arrayidx169, align 1
545  %arrayidx172 = getelementptr inbounds i8, i8* %b.addr.0351, i64 14
546  %59 = load i8, i8* %arrayidx172, align 1
547  %cmp.i309 = icmp ult i8 %56, %57
548  %b.a.i310.v.v = select i1 %cmp.i309, i8 %59, i8 %58
549  %b.a.i310.v = zext i8 %b.a.i310.v.v to i32
550  %b.a.i310 = mul i32 %b.a.i310.v, %w
551  %retval.0.i311 = trunc i32 %b.a.i310 to i8
552  %arrayidx176 = getelementptr inbounds i8, i8* %e.addr.0354, i64 14
553  store i8 %retval.0.i311, i8* %arrayidx176, align 1
554  %arrayidx177 = getelementptr inbounds i8, i8* %c.addr.0352, i64 15
555  %60 = load i8, i8* %arrayidx177, align 1
556  %arrayidx179 = getelementptr inbounds i8, i8* %d.addr.0353, i64 15
557  %61 = load i8, i8* %arrayidx179, align 1
558  %arrayidx181 = getelementptr inbounds i8, i8* %a.addr.0355, i64 15
559  %62 = load i8, i8* %arrayidx181, align 1
560  %arrayidx184 = getelementptr inbounds i8, i8* %b.addr.0351, i64 15
561  %63 = load i8, i8* %arrayidx184, align 1
562  %cmp.i306 = icmp ult i8 %60, %61
563  %b.a.i307.v.v = select i1 %cmp.i306, i8 %63, i8 %62
564  %b.a.i307.v = zext i8 %b.a.i307.v.v to i32
565  %b.a.i307 = mul i32 %b.a.i307.v, %w
566  %retval.0.i308 = trunc i32 %b.a.i307 to i8
567  %arrayidx188 = getelementptr inbounds i8, i8* %e.addr.0354, i64 15
568  store i8 %retval.0.i308, i8* %arrayidx188, align 1
569  %inc = add nuw nsw i32 %i.0356, 1
570  %add.ptr = getelementptr inbounds i8, i8* %a.addr.0355, i64 16
571  %add.ptr189 = getelementptr inbounds i8, i8* %b.addr.0351, i64 16
572  %add.ptr190 = getelementptr inbounds i8, i8* %c.addr.0352, i64 16
573  %add.ptr191 = getelementptr inbounds i8, i8* %d.addr.0353, i64 16
574  %add.ptr192 = getelementptr inbounds i8, i8* %e.addr.0354, i64 16
575  %exitcond = icmp eq i32 %inc, 8
576  br i1 %exitcond, label %for.end, label %for.body
577
578for.end:                                          ; preds = %for.body
579  ret void
580}
581
582@ib = local_unnamed_addr global [64 x i32] [i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0, i32 0, i32 1, i32 0, i32 1, i32 0], align 16
583@ia = common local_unnamed_addr global [64 x i32] zeroinitializer, align 16
584
585define i32 @foo1() local_unnamed_addr #0 {
586; SSE-LABEL: @foo1(
587; SSE-NEXT:  entry:
588; SSE-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16
589; SSE-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1>
590; SSE-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16
591; SSE-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16
592; SSE-NEXT:    [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1>
593; SSE-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16
594; SSE-NEXT:    [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16
595; SSE-NEXT:    [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
596; SSE-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16
597; SSE-NEXT:    [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16
598; SSE-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
599; SSE-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16
600; SSE-NEXT:    [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16
601; SSE-NEXT:    [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
602; SSE-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16
603; SSE-NEXT:    [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16
604; SSE-NEXT:    [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], <i32 -1, i32 -1, i32 -1, i32 -1>
605; SSE-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16
606; SSE-NEXT:    [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16
607; SSE-NEXT:    [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], <i32 -1, i32 -1, i32 -1, i32 -1>
608; SSE-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16
609; SSE-NEXT:    [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16
610; SSE-NEXT:    [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], <i32 -1, i32 -1, i32 -1, i32 -1>
611; SSE-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16
612; SSE-NEXT:    [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16
613; SSE-NEXT:    [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], <i32 -1, i32 -1, i32 -1, i32 -1>
614; SSE-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16
615; SSE-NEXT:    [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16
616; SSE-NEXT:    [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], <i32 -1, i32 -1, i32 -1, i32 -1>
617; SSE-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16
618; SSE-NEXT:    [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16
619; SSE-NEXT:    [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], <i32 -1, i32 -1, i32 -1, i32 -1>
620; SSE-NEXT:    store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16
621; SSE-NEXT:    [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16
622; SSE-NEXT:    [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], <i32 -1, i32 -1, i32 -1, i32 -1>
623; SSE-NEXT:    store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16
624; SSE-NEXT:    [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16
625; SSE-NEXT:    [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], <i32 -1, i32 -1, i32 -1, i32 -1>
626; SSE-NEXT:    store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16
627; SSE-NEXT:    [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16
628; SSE-NEXT:    [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], <i32 -1, i32 -1, i32 -1, i32 -1>
629; SSE-NEXT:    store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16
630; SSE-NEXT:    [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16
631; SSE-NEXT:    [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], <i32 -1, i32 -1, i32 -1, i32 -1>
632; SSE-NEXT:    store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16
633; SSE-NEXT:    [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16
634; SSE-NEXT:    [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], <i32 -1, i32 -1, i32 -1, i32 -1>
635; SSE-NEXT:    store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16
636; SSE-NEXT:    br label [[FOR_BODY5:%.*]]
637; SSE:       for.cond3:
638; SSE-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
639; SSE-NEXT:    [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
640; SSE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
641; SSE:       for.body5:
642; SSE-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
643; SSE-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
644; SSE-NEXT:    [[TMP32:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
645; SSE-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
646; SSE-NEXT:    [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
647; SSE-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP33]], -1
648; SSE-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP32]], [[NEG10]]
649; SSE-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
650; SSE:       if.then:
651; SSE-NEXT:    tail call void @abort()
652; SSE-NEXT:    unreachable
653; SSE:       for.end14:
654; SSE-NEXT:    ret i32 0
655;
656; AVX512-LABEL: @foo1(
657; AVX512-NEXT:  entry:
658; AVX512-NEXT:    [[TMP0:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([64 x i32]* @ib to <16 x i32>*), align 16
659; AVX512-NEXT:    [[TMP1:%.*]] = xor <16 x i32> [[TMP0]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
660; AVX512-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* bitcast ([64 x i32]* @ia to <16 x i32>*), align 16
661; AVX512-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <16 x i32>*), align 16
662; AVX512-NEXT:    [[TMP3:%.*]] = xor <16 x i32> [[TMP2]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
663; AVX512-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <16 x i32>*), align 16
664; AVX512-NEXT:    [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <16 x i32>*), align 16
665; AVX512-NEXT:    [[TMP5:%.*]] = xor <16 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
666; AVX512-NEXT:    store <16 x i32> [[TMP5]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <16 x i32>*), align 16
667; AVX512-NEXT:    [[TMP6:%.*]] = load <16 x i32>, <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <16 x i32>*), align 16
668; AVX512-NEXT:    [[TMP7:%.*]] = xor <16 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
669; AVX512-NEXT:    store <16 x i32> [[TMP7]], <16 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <16 x i32>*), align 16
670; AVX512-NEXT:    br label [[FOR_BODY5:%.*]]
671; AVX512:       for.cond3:
672; AVX512-NEXT:    [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV:%.*]], 1
673; AVX512-NEXT:    [[CMP4:%.*]] = icmp ult i64 [[INDVARS_IV]], 63
674; AVX512-NEXT:    br i1 [[CMP4]], label [[FOR_BODY5]], label [[FOR_END14:%.*]]
675; AVX512:       for.body5:
676; AVX512-NEXT:    [[INDVARS_IV]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT]], [[FOR_COND3:%.*]] ]
677; AVX512-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 [[INDVARS_IV]]
678; AVX512-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
679; AVX512-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 [[INDVARS_IV]]
680; AVX512-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4
681; AVX512-NEXT:    [[NEG10:%.*]] = xor i32 [[TMP9]], -1
682; AVX512-NEXT:    [[CMP11:%.*]] = icmp eq i32 [[TMP8]], [[NEG10]]
683; AVX512-NEXT:    br i1 [[CMP11]], label [[FOR_COND3]], label [[IF_THEN:%.*]]
684; AVX512:       if.then:
685; AVX512-NEXT:    tail call void @abort()
686; AVX512-NEXT:    unreachable
687; AVX512:       for.end14:
688; AVX512-NEXT:    ret i32 0
689;
690entry:
691  %0 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 0), align 16
692  %neg = xor i32 %0, -1
693  store i32 %neg, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 0), align 16
694  %1 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 1), align 4
695  %neg.1 = xor i32 %1, -1
696  store i32 %neg.1, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 1), align 4
697  %2 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 2), align 8
698  %neg.2 = xor i32 %2, -1
699  store i32 %neg.2, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 2), align 8
700  %3 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 3), align 4
701  %neg.3 = xor i32 %3, -1
702  store i32 %neg.3, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 3), align 4
703  %4 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4), align 16
704  %neg.4 = xor i32 %4, -1
705  store i32 %neg.4, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4), align 16
706  %5 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 5), align 4
707  %neg.5 = xor i32 %5, -1
708  store i32 %neg.5, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 5), align 4
709  %6 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 6), align 8
710  %neg.6 = xor i32 %6, -1
711  store i32 %neg.6, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 6), align 8
712  %7 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 7), align 4
713  %neg.7 = xor i32 %7, -1
714  store i32 %neg.7, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 7), align 4
715  %8 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8), align 16
716  %neg.8 = xor i32 %8, -1
717  store i32 %neg.8, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8), align 16
718  %9 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 9), align 4
719  %neg.9 = xor i32 %9, -1
720  store i32 %neg.9, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 9), align 4
721  %10 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 10), align 8
722  %neg.10 = xor i32 %10, -1
723  store i32 %neg.10, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 10), align 8
724  %11 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 11), align 4
725  %neg.11 = xor i32 %11, -1
726  store i32 %neg.11, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 11), align 4
727  %12 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12), align 16
728  %neg.12 = xor i32 %12, -1
729  store i32 %neg.12, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12), align 16
730  %13 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 13), align 4
731  %neg.13 = xor i32 %13, -1
732  store i32 %neg.13, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 13), align 4
733  %14 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 14), align 8
734  %neg.14 = xor i32 %14, -1
735  store i32 %neg.14, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 14), align 8
736  %15 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 15), align 4
737  %neg.15 = xor i32 %15, -1
738  store i32 %neg.15, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 15), align 4
739  %16 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16), align 16
740  %neg.16 = xor i32 %16, -1
741  store i32 %neg.16, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16), align 16
742  %17 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 17), align 4
743  %neg.17 = xor i32 %17, -1
744  store i32 %neg.17, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 17), align 4
745  %18 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 18), align 8
746  %neg.18 = xor i32 %18, -1
747  store i32 %neg.18, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 18), align 8
748  %19 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 19), align 4
749  %neg.19 = xor i32 %19, -1
750  store i32 %neg.19, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 19), align 4
751  %20 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20), align 16
752  %neg.20 = xor i32 %20, -1
753  store i32 %neg.20, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20), align 16
754  %21 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 21), align 4
755  %neg.21 = xor i32 %21, -1
756  store i32 %neg.21, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 21), align 4
757  %22 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 22), align 8
758  %neg.22 = xor i32 %22, -1
759  store i32 %neg.22, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 22), align 8
760  %23 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 23), align 4
761  %neg.23 = xor i32 %23, -1
762  store i32 %neg.23, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 23), align 4
763  %24 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24), align 16
764  %neg.24 = xor i32 %24, -1
765  store i32 %neg.24, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24), align 16
766  %25 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 25), align 4
767  %neg.25 = xor i32 %25, -1
768  store i32 %neg.25, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 25), align 4
769  %26 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 26), align 8
770  %neg.26 = xor i32 %26, -1
771  store i32 %neg.26, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 26), align 8
772  %27 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 27), align 4
773  %neg.27 = xor i32 %27, -1
774  store i32 %neg.27, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 27), align 4
775  %28 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28), align 16
776  %neg.28 = xor i32 %28, -1
777  store i32 %neg.28, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28), align 16
778  %29 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 29), align 4
779  %neg.29 = xor i32 %29, -1
780  store i32 %neg.29, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 29), align 4
781  %30 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 30), align 8
782  %neg.30 = xor i32 %30, -1
783  store i32 %neg.30, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 30), align 8
784  %31 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 31), align 4
785  %neg.31 = xor i32 %31, -1
786  store i32 %neg.31, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 31), align 4
787  %32 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32), align 16
788  %neg.32 = xor i32 %32, -1
789  store i32 %neg.32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32), align 16
790  %33 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 33), align 4
791  %neg.33 = xor i32 %33, -1
792  store i32 %neg.33, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 33), align 4
793  %34 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 34), align 8
794  %neg.34 = xor i32 %34, -1
795  store i32 %neg.34, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 34), align 8
796  %35 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 35), align 4
797  %neg.35 = xor i32 %35, -1
798  store i32 %neg.35, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 35), align 4
799  %36 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36), align 16
800  %neg.36 = xor i32 %36, -1
801  store i32 %neg.36, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36), align 16
802  %37 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 37), align 4
803  %neg.37 = xor i32 %37, -1
804  store i32 %neg.37, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 37), align 4
805  %38 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 38), align 8
806  %neg.38 = xor i32 %38, -1
807  store i32 %neg.38, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 38), align 8
808  %39 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 39), align 4
809  %neg.39 = xor i32 %39, -1
810  store i32 %neg.39, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 39), align 4
811  %40 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40), align 16
812  %neg.40 = xor i32 %40, -1
813  store i32 %neg.40, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40), align 16
814  %41 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 41), align 4
815  %neg.41 = xor i32 %41, -1
816  store i32 %neg.41, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 41), align 4
817  %42 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 42), align 8
818  %neg.42 = xor i32 %42, -1
819  store i32 %neg.42, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 42), align 8
820  %43 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 43), align 4
821  %neg.43 = xor i32 %43, -1
822  store i32 %neg.43, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 43), align 4
823  %44 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44), align 16
824  %neg.44 = xor i32 %44, -1
825  store i32 %neg.44, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44), align 16
826  %45 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 45), align 4
827  %neg.45 = xor i32 %45, -1
828  store i32 %neg.45, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 45), align 4
829  %46 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 46), align 8
830  %neg.46 = xor i32 %46, -1
831  store i32 %neg.46, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 46), align 8
832  %47 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 47), align 4
833  %neg.47 = xor i32 %47, -1
834  store i32 %neg.47, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 47), align 4
835  %48 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48), align 16
836  %neg.48 = xor i32 %48, -1
837  store i32 %neg.48, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48), align 16
838  %49 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 49), align 4
839  %neg.49 = xor i32 %49, -1
840  store i32 %neg.49, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 49), align 4
841  %50 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 50), align 8
842  %neg.50 = xor i32 %50, -1
843  store i32 %neg.50, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 50), align 8
844  %51 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 51), align 4
845  %neg.51 = xor i32 %51, -1
846  store i32 %neg.51, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 51), align 4
847  %52 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52), align 16
848  %neg.52 = xor i32 %52, -1
849  store i32 %neg.52, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52), align 16
850  %53 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 53), align 4
851  %neg.53 = xor i32 %53, -1
852  store i32 %neg.53, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 53), align 4
853  %54 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 54), align 8
854  %neg.54 = xor i32 %54, -1
855  store i32 %neg.54, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 54), align 8
856  %55 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 55), align 4
857  %neg.55 = xor i32 %55, -1
858  store i32 %neg.55, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 55), align 4
859  %56 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56), align 16
860  %neg.56 = xor i32 %56, -1
861  store i32 %neg.56, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56), align 16
862  %57 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 57), align 4
863  %neg.57 = xor i32 %57, -1
864  store i32 %neg.57, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 57), align 4
865  %58 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 58), align 8
866  %neg.58 = xor i32 %58, -1
867  store i32 %neg.58, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 58), align 8
868  %59 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 59), align 4
869  %neg.59 = xor i32 %59, -1
870  store i32 %neg.59, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 59), align 4
871  %60 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60), align 16
872  %neg.60 = xor i32 %60, -1
873  store i32 %neg.60, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60), align 16
874  %61 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 61), align 4
875  %neg.61 = xor i32 %61, -1
876  store i32 %neg.61, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 61), align 4
877  %62 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 62), align 8
878  %neg.62 = xor i32 %62, -1
879  store i32 %neg.62, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 62), align 8
880  %63 = load i32, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 63), align 4
881  %neg.63 = xor i32 %63, -1
882  store i32 %neg.63, i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 63), align 4
883  br label %for.body5
884
885for.cond3:                                        ; preds = %for.body5
886  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
887  %cmp4 = icmp ult i64 %indvars.iv, 63
888  br i1 %cmp4, label %for.body5, label %for.end14
889
890for.body5:                                        ; preds = %entry, %for.cond3
891  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.cond3 ]
892  %arrayidx7 = getelementptr inbounds [64 x i32], [64 x i32]* @ia, i64 0, i64 %indvars.iv
893  %64 = load i32, i32* %arrayidx7, align 4
894  %arrayidx9 = getelementptr inbounds [64 x i32], [64 x i32]* @ib, i64 0, i64 %indvars.iv
895  %65 = load i32, i32* %arrayidx9, align 4
896  %neg10 = xor i32 %65, -1
897  %cmp11 = icmp eq i32 %64, %neg10
898  br i1 %cmp11, label %for.cond3, label %if.then
899
900if.then:                                          ; preds = %for.body5
901  tail call void @abort() #2
902  unreachable
903
904for.end14:                                        ; preds = %for.cond3
905  ret i32 0
906}
907
908declare void @abort() #2
909