1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
4; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
5
6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7
8@src64 = common global [4 x i64] zeroinitializer, align 32
9@dst64 = common global [4 x i64] zeroinitializer, align 32
10@src32 = common global [8 x i32] zeroinitializer, align 32
11@dst32 = common global [8 x i32] zeroinitializer, align 32
12@src16 = common global [16 x i16] zeroinitializer, align 32
13@dst16 = common global [16 x i16] zeroinitializer, align 32
14@src8  = common global [32 x i8] zeroinitializer, align 32
15@dst8  = common global [32 x i8] zeroinitializer, align 32
16
17declare i64 @llvm.ctlz.i64(i64, i1)
18declare i32 @llvm.ctlz.i32(i32, i1)
19declare i16 @llvm.ctlz.i16(i16, i1)
20declare  i8 @llvm.ctlz.i8(i8, i1)
21
22;
23; CTLZ
24;
25
26define void @ctlz_2i64() #0 {
27; CHECK-LABEL: @ctlz_2i64(
28; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
29; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
30; CHECK-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
31; CHECK-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
32; CHECK-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
33; CHECK-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
34; CHECK-NEXT:    ret void
35;
36  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
37  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
38  %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
39  %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
40  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
41  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
42  ret void
43}
44
45define void @ctlz_4i64() #0 {
46; CHECK-LABEL: @ctlz_4i64(
47; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
48; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
49; CHECK-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
50; CHECK-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
51; CHECK-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
52; CHECK-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
53; CHECK-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
54; CHECK-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
55; CHECK-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
56; CHECK-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
57; CHECK-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
58; CHECK-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
59; CHECK-NEXT:    ret void
60;
61  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
62  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
63  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
64  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
65  %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
66  %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
67  %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 0)
68  %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 0)
69  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
70  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
71  store i64 %ctlz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
72  store i64 %ctlz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
73  ret void
74}
75
76define void @ctlz_4i32() #0 {
77; CHECK-LABEL: @ctlz_4i32(
78; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
79; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
80; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
81; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
82; CHECK-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false)
83; CHECK-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false)
84; CHECK-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false)
85; CHECK-NEXT:    [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false)
86; CHECK-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
87; CHECK-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
88; CHECK-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
89; CHECK-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
90; CHECK-NEXT:    ret void
91;
92  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
93  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
94  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
95  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
96  %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
97  %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
98  %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
99  %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
100  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
101  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
102  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
103  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
104  ret void
105}
106
107define void @ctlz_8i32() #0 {
108; CHECK-LABEL: @ctlz_8i32(
109; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
110; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
111; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
112; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
113; CHECK-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
114; CHECK-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
115; CHECK-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
116; CHECK-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
117; CHECK-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false)
118; CHECK-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false)
119; CHECK-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false)
120; CHECK-NEXT:    [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false)
121; CHECK-NEXT:    [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 false)
122; CHECK-NEXT:    [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 false)
123; CHECK-NEXT:    [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 false)
124; CHECK-NEXT:    [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 false)
125; CHECK-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
126; CHECK-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
127; CHECK-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
128; CHECK-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
129; CHECK-NEXT:    store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
130; CHECK-NEXT:    store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
131; CHECK-NEXT:    store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
132; CHECK-NEXT:    store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
133; CHECK-NEXT:    ret void
134;
135  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
136  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
137  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
138  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
139  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
140  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
141  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
142  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
143  %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
144  %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
145  %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
146  %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
147  %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 0)
148  %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 0)
149  %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 0)
150  %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 0)
151  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
152  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
153  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
154  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
155  store i32 %ctlz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
156  store i32 %ctlz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
157  store i32 %ctlz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
158  store i32 %ctlz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
159  ret void
160}
161
162define void @ctlz_8i16() #0 {
163; CHECK-LABEL: @ctlz_8i16(
164; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
165; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
166; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
167; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
168; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
169; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
170; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
171; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
172; CHECK-NEXT:    [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 false)
173; CHECK-NEXT:    [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 false)
174; CHECK-NEXT:    [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 false)
175; CHECK-NEXT:    [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 false)
176; CHECK-NEXT:    [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 false)
177; CHECK-NEXT:    [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 false)
178; CHECK-NEXT:    [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 false)
179; CHECK-NEXT:    [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 false)
180; CHECK-NEXT:    store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
181; CHECK-NEXT:    store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
182; CHECK-NEXT:    store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
183; CHECK-NEXT:    store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
184; CHECK-NEXT:    store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
185; CHECK-NEXT:    store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
186; CHECK-NEXT:    store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
187; CHECK-NEXT:    store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
188; CHECK-NEXT:    ret void
189;
190  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
191  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
192  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
193  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
194  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
195  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
196  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
197  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
198  %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
199  %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
200  %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
201  %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
202  %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
203  %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
204  %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
205  %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
206  store i16 %ctlz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
207  store i16 %ctlz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
208  store i16 %ctlz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
209  store i16 %ctlz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
210  store i16 %ctlz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
211  store i16 %ctlz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
212  store i16 %ctlz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
213  store i16 %ctlz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
214  ret void
215}
216
217define void @ctlz_16i16() #0 {
218; CHECK-LABEL: @ctlz_16i16(
219; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
220; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
221; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
222; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
223; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
224; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
225; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
226; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
227; CHECK-NEXT:    [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
228; CHECK-NEXT:    [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
229; CHECK-NEXT:    [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
230; CHECK-NEXT:    [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
231; CHECK-NEXT:    [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
232; CHECK-NEXT:    [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
233; CHECK-NEXT:    [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
234; CHECK-NEXT:    [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
235; CHECK-NEXT:    [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 false)
236; CHECK-NEXT:    [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 false)
237; CHECK-NEXT:    [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 false)
238; CHECK-NEXT:    [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 false)
239; CHECK-NEXT:    [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 false)
240; CHECK-NEXT:    [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 false)
241; CHECK-NEXT:    [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 false)
242; CHECK-NEXT:    [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 false)
243; CHECK-NEXT:    [[CTLZ8:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD8]], i1 false)
244; CHECK-NEXT:    [[CTLZ9:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD9]], i1 false)
245; CHECK-NEXT:    [[CTLZ10:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD10]], i1 false)
246; CHECK-NEXT:    [[CTLZ11:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD11]], i1 false)
247; CHECK-NEXT:    [[CTLZ12:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD12]], i1 false)
248; CHECK-NEXT:    [[CTLZ13:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD13]], i1 false)
249; CHECK-NEXT:    [[CTLZ14:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD14]], i1 false)
250; CHECK-NEXT:    [[CTLZ15:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD15]], i1 false)
251; CHECK-NEXT:    store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
252; CHECK-NEXT:    store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
253; CHECK-NEXT:    store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
254; CHECK-NEXT:    store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
255; CHECK-NEXT:    store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
256; CHECK-NEXT:    store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
257; CHECK-NEXT:    store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
258; CHECK-NEXT:    store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
259; CHECK-NEXT:    store i16 [[CTLZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
260; CHECK-NEXT:    store i16 [[CTLZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
261; CHECK-NEXT:    store i16 [[CTLZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
262; CHECK-NEXT:    store i16 [[CTLZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
263; CHECK-NEXT:    store i16 [[CTLZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
264; CHECK-NEXT:    store i16 [[CTLZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
265; CHECK-NEXT:    store i16 [[CTLZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
266; CHECK-NEXT:    store i16 [[CTLZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
267; CHECK-NEXT:    ret void
268;
269  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
270  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
271  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
272  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
273  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
274  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
275  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
276  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
277  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
278  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
279  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
280  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
281  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
282  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
283  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
284  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
285  %ctlz0  = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
286  %ctlz1  = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
287  %ctlz2  = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
288  %ctlz3  = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
289  %ctlz4  = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
290  %ctlz5  = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
291  %ctlz6  = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
292  %ctlz7  = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
293  %ctlz8  = call i16 @llvm.ctlz.i16(i16 %ld8, i1 0)
294  %ctlz9  = call i16 @llvm.ctlz.i16(i16 %ld9, i1 0)
295  %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 0)
296  %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 0)
297  %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 0)
298  %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 0)
299  %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 0)
300  %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 0)
301  store i16 %ctlz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
302  store i16 %ctlz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
303  store i16 %ctlz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
304  store i16 %ctlz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
305  store i16 %ctlz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
306  store i16 %ctlz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
307  store i16 %ctlz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
308  store i16 %ctlz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
309  store i16 %ctlz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
310  store i16 %ctlz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
311  store i16 %ctlz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
312  store i16 %ctlz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
313  store i16 %ctlz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
314  store i16 %ctlz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
315  store i16 %ctlz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
316  store i16 %ctlz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
317  ret void
318}
319
320define void @ctlz_16i8() #0 {
321; CHECK-LABEL: @ctlz_16i8(
322; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
323; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
324; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
325; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
326; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
327; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
328; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
329; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
330; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
331; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
332; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
333; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
334; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
335; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
336; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
337; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
338; CHECK-NEXT:    [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 false)
339; CHECK-NEXT:    [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 false)
340; CHECK-NEXT:    [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 false)
341; CHECK-NEXT:    [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 false)
342; CHECK-NEXT:    [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 false)
343; CHECK-NEXT:    [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 false)
344; CHECK-NEXT:    [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 false)
345; CHECK-NEXT:    [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 false)
346; CHECK-NEXT:    [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 false)
347; CHECK-NEXT:    [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 false)
348; CHECK-NEXT:    [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 false)
349; CHECK-NEXT:    [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 false)
350; CHECK-NEXT:    [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 false)
351; CHECK-NEXT:    [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 false)
352; CHECK-NEXT:    [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 false)
353; CHECK-NEXT:    [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 false)
354; CHECK-NEXT:    store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
355; CHECK-NEXT:    store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
356; CHECK-NEXT:    store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
357; CHECK-NEXT:    store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
358; CHECK-NEXT:    store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
359; CHECK-NEXT:    store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
360; CHECK-NEXT:    store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
361; CHECK-NEXT:    store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
362; CHECK-NEXT:    store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
363; CHECK-NEXT:    store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
364; CHECK-NEXT:    store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
365; CHECK-NEXT:    store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
366; CHECK-NEXT:    store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
367; CHECK-NEXT:    store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
368; CHECK-NEXT:    store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
369; CHECK-NEXT:    store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
370; CHECK-NEXT:    ret void
371;
372  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
373  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
374  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
375  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
376  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
377  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
378  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
379  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
380  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
381  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
382  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
383  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
384  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
385  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
386  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
387  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
388  %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
389  %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
390  %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
391  %ctlz3  = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
392  %ctlz4  = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
393  %ctlz5  = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
394  %ctlz6  = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
395  %ctlz7  = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
396  %ctlz8  = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
397  %ctlz9  = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
398  %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
399  %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
400  %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
401  %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
402  %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
403  %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
404  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
405  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
406  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
407  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
408  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
409  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
410  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
411  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
412  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
413  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
414  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
415  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
416  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
417  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
418  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
419  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
420  ret void
421}
422
423define void @ctlz_32i8() #0 {
424; CHECK-LABEL: @ctlz_32i8(
425; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
426; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
427; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
428; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
429; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
430; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
431; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
432; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
433; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
434; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
435; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
436; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
437; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
438; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
439; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
440; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
441; CHECK-NEXT:    [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
442; CHECK-NEXT:    [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
443; CHECK-NEXT:    [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
444; CHECK-NEXT:    [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
445; CHECK-NEXT:    [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
446; CHECK-NEXT:    [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
447; CHECK-NEXT:    [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
448; CHECK-NEXT:    [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
449; CHECK-NEXT:    [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
450; CHECK-NEXT:    [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
451; CHECK-NEXT:    [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
452; CHECK-NEXT:    [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
453; CHECK-NEXT:    [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
454; CHECK-NEXT:    [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
455; CHECK-NEXT:    [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
456; CHECK-NEXT:    [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
457; CHECK-NEXT:    [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 false)
458; CHECK-NEXT:    [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 false)
459; CHECK-NEXT:    [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 false)
460; CHECK-NEXT:    [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 false)
461; CHECK-NEXT:    [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 false)
462; CHECK-NEXT:    [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 false)
463; CHECK-NEXT:    [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 false)
464; CHECK-NEXT:    [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 false)
465; CHECK-NEXT:    [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 false)
466; CHECK-NEXT:    [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 false)
467; CHECK-NEXT:    [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 false)
468; CHECK-NEXT:    [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 false)
469; CHECK-NEXT:    [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 false)
470; CHECK-NEXT:    [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 false)
471; CHECK-NEXT:    [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 false)
472; CHECK-NEXT:    [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 false)
473; CHECK-NEXT:    [[CTLZ16:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD16]], i1 false)
474; CHECK-NEXT:    [[CTLZ17:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD17]], i1 false)
475; CHECK-NEXT:    [[CTLZ18:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD18]], i1 false)
476; CHECK-NEXT:    [[CTLZ19:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD19]], i1 false)
477; CHECK-NEXT:    [[CTLZ20:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD20]], i1 false)
478; CHECK-NEXT:    [[CTLZ21:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD21]], i1 false)
479; CHECK-NEXT:    [[CTLZ22:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD22]], i1 false)
480; CHECK-NEXT:    [[CTLZ23:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD23]], i1 false)
481; CHECK-NEXT:    [[CTLZ24:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD24]], i1 false)
482; CHECK-NEXT:    [[CTLZ25:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD25]], i1 false)
483; CHECK-NEXT:    [[CTLZ26:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD26]], i1 false)
484; CHECK-NEXT:    [[CTLZ27:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD27]], i1 false)
485; CHECK-NEXT:    [[CTLZ28:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD28]], i1 false)
486; CHECK-NEXT:    [[CTLZ29:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD29]], i1 false)
487; CHECK-NEXT:    [[CTLZ30:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD30]], i1 false)
488; CHECK-NEXT:    [[CTLZ31:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD31]], i1 false)
489; CHECK-NEXT:    store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
490; CHECK-NEXT:    store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
491; CHECK-NEXT:    store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
492; CHECK-NEXT:    store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
493; CHECK-NEXT:    store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
494; CHECK-NEXT:    store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
495; CHECK-NEXT:    store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
496; CHECK-NEXT:    store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
497; CHECK-NEXT:    store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
498; CHECK-NEXT:    store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
499; CHECK-NEXT:    store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
500; CHECK-NEXT:    store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
501; CHECK-NEXT:    store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
502; CHECK-NEXT:    store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
503; CHECK-NEXT:    store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
504; CHECK-NEXT:    store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
505; CHECK-NEXT:    store i8 [[CTLZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
506; CHECK-NEXT:    store i8 [[CTLZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
507; CHECK-NEXT:    store i8 [[CTLZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
508; CHECK-NEXT:    store i8 [[CTLZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
509; CHECK-NEXT:    store i8 [[CTLZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
510; CHECK-NEXT:    store i8 [[CTLZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
511; CHECK-NEXT:    store i8 [[CTLZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
512; CHECK-NEXT:    store i8 [[CTLZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
513; CHECK-NEXT:    store i8 [[CTLZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
514; CHECK-NEXT:    store i8 [[CTLZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
515; CHECK-NEXT:    store i8 [[CTLZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
516; CHECK-NEXT:    store i8 [[CTLZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
517; CHECK-NEXT:    store i8 [[CTLZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
518; CHECK-NEXT:    store i8 [[CTLZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
519; CHECK-NEXT:    store i8 [[CTLZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
520; CHECK-NEXT:    store i8 [[CTLZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
521; CHECK-NEXT:    ret void
522;
523  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
524  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
525  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
526  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
527  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
528  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
529  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
530  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
531  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
532  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
533  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
534  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
535  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
536  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
537  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
538  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
539  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
540  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
541  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
542  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
543  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
544  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
545  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
546  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
547  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
548  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
549  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
550  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
551  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
552  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
553  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
554  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
555  %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
556  %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
557  %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
558  %ctlz3  = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
559  %ctlz4  = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
560  %ctlz5  = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
561  %ctlz6  = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
562  %ctlz7  = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
563  %ctlz8  = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
564  %ctlz9  = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
565  %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
566  %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
567  %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
568  %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
569  %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
570  %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
571  %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 0)
572  %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 0)
573  %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 0)
574  %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 0)
575  %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 0)
576  %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 0)
577  %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 0)
578  %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 0)
579  %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 0)
580  %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 0)
581  %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 0)
582  %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 0)
583  %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 0)
584  %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 0)
585  %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 0)
586  %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 0)
587  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
588  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
589  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
590  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
591  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
592  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
593  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
594  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
595  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
596  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
597  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
598  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
599  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
600  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
601  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
602  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
603  store i8 %ctlz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
604  store i8 %ctlz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
605  store i8 %ctlz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
606  store i8 %ctlz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
607  store i8 %ctlz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
608  store i8 %ctlz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
609  store i8 %ctlz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
610  store i8 %ctlz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
611  store i8 %ctlz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
612  store i8 %ctlz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
613  store i8 %ctlz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
614  store i8 %ctlz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
615  store i8 %ctlz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
616  store i8 %ctlz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
617  store i8 %ctlz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
618  store i8 %ctlz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
619  ret void
620}
621
622;
623; CTLZ_ZERO_UNDEF
624;
625
626define void @ctlz_undef_2i64() #0 {
627; CHECK-LABEL: @ctlz_undef_2i64(
628; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
629; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
630; CHECK-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
631; CHECK-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
632; CHECK-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
633; CHECK-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
634; CHECK-NEXT:    ret void
635;
636  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
637  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
638  %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
639  %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
640  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
641  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
642  ret void
643}
644
645define void @ctlz_undef_4i64() #0 {
646; CHECK-LABEL: @ctlz_undef_4i64(
647; CHECK-NEXT:    [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
648; CHECK-NEXT:    [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
649; CHECK-NEXT:    [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
650; CHECK-NEXT:    [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
651; CHECK-NEXT:    [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
652; CHECK-NEXT:    [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
653; CHECK-NEXT:    [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
654; CHECK-NEXT:    [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
655; CHECK-NEXT:    store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
656; CHECK-NEXT:    store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
657; CHECK-NEXT:    store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
658; CHECK-NEXT:    store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
659; CHECK-NEXT:    ret void
660;
661  %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
662  %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
663  %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
664  %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
665  %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
666  %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
667  %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 -1)
668  %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 -1)
669  store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
670  store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
671  store i64 %ctlz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
672  store i64 %ctlz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
673  ret void
674}
675
676define void @ctlz_undef_4i32() #0 {
677; CHECK-LABEL: @ctlz_undef_4i32(
678; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
679; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
680; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
681; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
682; CHECK-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
683; CHECK-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
684; CHECK-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
685; CHECK-NEXT:    [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
686; CHECK-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
687; CHECK-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
688; CHECK-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
689; CHECK-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
690; CHECK-NEXT:    ret void
691;
692  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
693  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
694  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
695  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
696  %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
697  %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
698  %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
699  %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
700  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
701  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
702  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
703  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
704  ret void
705}
706
707define void @ctlz_undef_8i32() #0 {
708; CHECK-LABEL: @ctlz_undef_8i32(
709; CHECK-NEXT:    [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
710; CHECK-NEXT:    [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
711; CHECK-NEXT:    [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
712; CHECK-NEXT:    [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
713; CHECK-NEXT:    [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
714; CHECK-NEXT:    [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
715; CHECK-NEXT:    [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
716; CHECK-NEXT:    [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
717; CHECK-NEXT:    [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
718; CHECK-NEXT:    [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
719; CHECK-NEXT:    [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
720; CHECK-NEXT:    [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
721; CHECK-NEXT:    [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
722; CHECK-NEXT:    [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
723; CHECK-NEXT:    [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
724; CHECK-NEXT:    [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
725; CHECK-NEXT:    store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
726; CHECK-NEXT:    store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
727; CHECK-NEXT:    store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
728; CHECK-NEXT:    store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
729; CHECK-NEXT:    store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
730; CHECK-NEXT:    store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
731; CHECK-NEXT:    store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
732; CHECK-NEXT:    store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
733; CHECK-NEXT:    ret void
734;
735  %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
736  %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
737  %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
738  %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
739  %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
740  %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
741  %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
742  %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
743  %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
744  %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
745  %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
746  %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
747  %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 -1)
748  %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 -1)
749  %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 -1)
750  %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 -1)
751  store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
752  store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
753  store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
754  store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
755  store i32 %ctlz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
756  store i32 %ctlz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
757  store i32 %ctlz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
758  store i32 %ctlz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
759  ret void
760}
761
762define void @ctlz_undef_8i16() #0 {
763; CHECK-LABEL: @ctlz_undef_8i16(
764; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
765; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
766; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
767; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
768; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
769; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
770; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
771; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
772; CHECK-NEXT:    [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 true)
773; CHECK-NEXT:    [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 true)
774; CHECK-NEXT:    [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 true)
775; CHECK-NEXT:    [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 true)
776; CHECK-NEXT:    [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 true)
777; CHECK-NEXT:    [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 true)
778; CHECK-NEXT:    [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 true)
779; CHECK-NEXT:    [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 true)
780; CHECK-NEXT:    store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
781; CHECK-NEXT:    store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
782; CHECK-NEXT:    store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
783; CHECK-NEXT:    store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
784; CHECK-NEXT:    store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
785; CHECK-NEXT:    store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
786; CHECK-NEXT:    store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
787; CHECK-NEXT:    store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
788; CHECK-NEXT:    ret void
789;
790  %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
791  %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
792  %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
793  %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
794  %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
795  %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
796  %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
797  %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
798  %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
799  %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
800  %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
801  %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
802  %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
803  %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
804  %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
805  %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
806  store i16 %ctlz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
807  store i16 %ctlz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
808  store i16 %ctlz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
809  store i16 %ctlz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
810  store i16 %ctlz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
811  store i16 %ctlz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
812  store i16 %ctlz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
813  store i16 %ctlz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
814  ret void
815}
816
817define void @ctlz_undef_16i16() #0 {
818; CHECK-LABEL: @ctlz_undef_16i16(
819; CHECK-NEXT:    [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
820; CHECK-NEXT:    [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
821; CHECK-NEXT:    [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
822; CHECK-NEXT:    [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
823; CHECK-NEXT:    [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
824; CHECK-NEXT:    [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
825; CHECK-NEXT:    [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
826; CHECK-NEXT:    [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
827; CHECK-NEXT:    [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
828; CHECK-NEXT:    [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
829; CHECK-NEXT:    [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
830; CHECK-NEXT:    [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
831; CHECK-NEXT:    [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
832; CHECK-NEXT:    [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
833; CHECK-NEXT:    [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
834; CHECK-NEXT:    [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
835; CHECK-NEXT:    [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 true)
836; CHECK-NEXT:    [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 true)
837; CHECK-NEXT:    [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 true)
838; CHECK-NEXT:    [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 true)
839; CHECK-NEXT:    [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 true)
840; CHECK-NEXT:    [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 true)
841; CHECK-NEXT:    [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 true)
842; CHECK-NEXT:    [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 true)
843; CHECK-NEXT:    [[CTLZ8:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD8]], i1 true)
844; CHECK-NEXT:    [[CTLZ9:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD9]], i1 true)
845; CHECK-NEXT:    [[CTLZ10:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD10]], i1 true)
846; CHECK-NEXT:    [[CTLZ11:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD11]], i1 true)
847; CHECK-NEXT:    [[CTLZ12:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD12]], i1 true)
848; CHECK-NEXT:    [[CTLZ13:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD13]], i1 true)
849; CHECK-NEXT:    [[CTLZ14:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD14]], i1 true)
850; CHECK-NEXT:    [[CTLZ15:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD15]], i1 true)
851; CHECK-NEXT:    store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
852; CHECK-NEXT:    store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
853; CHECK-NEXT:    store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
854; CHECK-NEXT:    store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
855; CHECK-NEXT:    store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
856; CHECK-NEXT:    store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
857; CHECK-NEXT:    store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
858; CHECK-NEXT:    store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
859; CHECK-NEXT:    store i16 [[CTLZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
860; CHECK-NEXT:    store i16 [[CTLZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
861; CHECK-NEXT:    store i16 [[CTLZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
862; CHECK-NEXT:    store i16 [[CTLZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
863; CHECK-NEXT:    store i16 [[CTLZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
864; CHECK-NEXT:    store i16 [[CTLZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
865; CHECK-NEXT:    store i16 [[CTLZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
866; CHECK-NEXT:    store i16 [[CTLZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
867; CHECK-NEXT:    ret void
868;
869  %ld0  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  0), align 2
870  %ld1  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  1), align 2
871  %ld2  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  2), align 2
872  %ld3  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  3), align 2
873  %ld4  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  4), align 2
874  %ld5  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  5), align 2
875  %ld6  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  6), align 2
876  %ld7  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  7), align 2
877  %ld8  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  8), align 2
878  %ld9  = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64  9), align 2
879  %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
880  %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
881  %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
882  %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
883  %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
884  %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
885  %ctlz0  = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
886  %ctlz1  = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
887  %ctlz2  = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
888  %ctlz3  = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
889  %ctlz4  = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
890  %ctlz5  = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
891  %ctlz6  = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
892  %ctlz7  = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
893  %ctlz8  = call i16 @llvm.ctlz.i16(i16 %ld8, i1 -1)
894  %ctlz9  = call i16 @llvm.ctlz.i16(i16 %ld9, i1 -1)
895  %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 -1)
896  %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 -1)
897  %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 -1)
898  %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 -1)
899  %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 -1)
900  %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 -1)
901  store i16 %ctlz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  0), align 2
902  store i16 %ctlz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  1), align 2
903  store i16 %ctlz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  2), align 2
904  store i16 %ctlz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  3), align 2
905  store i16 %ctlz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  4), align 2
906  store i16 %ctlz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  5), align 2
907  store i16 %ctlz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  6), align 2
908  store i16 %ctlz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  7), align 2
909  store i16 %ctlz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  8), align 2
910  store i16 %ctlz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64  9), align 2
911  store i16 %ctlz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
912  store i16 %ctlz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
913  store i16 %ctlz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
914  store i16 %ctlz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
915  store i16 %ctlz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
916  store i16 %ctlz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
917  ret void
918}
919
920define void @ctlz_undef_16i8() #0 {
921; CHECK-LABEL: @ctlz_undef_16i8(
922; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
923; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
924; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
925; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
926; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
927; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
928; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
929; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
930; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
931; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
932; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
933; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
934; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
935; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
936; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
937; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
938; CHECK-NEXT:    [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 true)
939; CHECK-NEXT:    [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 true)
940; CHECK-NEXT:    [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 true)
941; CHECK-NEXT:    [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 true)
942; CHECK-NEXT:    [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 true)
943; CHECK-NEXT:    [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 true)
944; CHECK-NEXT:    [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 true)
945; CHECK-NEXT:    [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 true)
946; CHECK-NEXT:    [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 true)
947; CHECK-NEXT:    [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 true)
948; CHECK-NEXT:    [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 true)
949; CHECK-NEXT:    [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 true)
950; CHECK-NEXT:    [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 true)
951; CHECK-NEXT:    [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 true)
952; CHECK-NEXT:    [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 true)
953; CHECK-NEXT:    [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 true)
954; CHECK-NEXT:    store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
955; CHECK-NEXT:    store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
956; CHECK-NEXT:    store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
957; CHECK-NEXT:    store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
958; CHECK-NEXT:    store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
959; CHECK-NEXT:    store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
960; CHECK-NEXT:    store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
961; CHECK-NEXT:    store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
962; CHECK-NEXT:    store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
963; CHECK-NEXT:    store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
964; CHECK-NEXT:    store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
965; CHECK-NEXT:    store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
966; CHECK-NEXT:    store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
967; CHECK-NEXT:    store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
968; CHECK-NEXT:    store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
969; CHECK-NEXT:    store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
970; CHECK-NEXT:    ret void
971;
972  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
973  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
974  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
975  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
976  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
977  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
978  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
979  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
980  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
981  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
982  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
983  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
984  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
985  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
986  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
987  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
988  %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
989  %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
990  %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
991  %ctlz3  = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
992  %ctlz4  = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
993  %ctlz5  = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
994  %ctlz6  = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
995  %ctlz7  = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
996  %ctlz8  = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
997  %ctlz9  = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
998  %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
999  %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
1000  %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
1001  %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
1002  %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
1003  %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
1004  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
1005  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
1006  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
1007  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
1008  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
1009  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
1010  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
1011  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
1012  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
1013  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
1014  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
1015  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
1016  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
1017  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
1018  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
1019  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
1020  ret void
1021}
1022
1023define void @ctlz_undef_32i8() #0 {
1024; CHECK-LABEL: @ctlz_undef_32i8(
1025; CHECK-NEXT:    [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
1026; CHECK-NEXT:    [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
1027; CHECK-NEXT:    [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
1028; CHECK-NEXT:    [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
1029; CHECK-NEXT:    [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
1030; CHECK-NEXT:    [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
1031; CHECK-NEXT:    [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
1032; CHECK-NEXT:    [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
1033; CHECK-NEXT:    [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
1034; CHECK-NEXT:    [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
1035; CHECK-NEXT:    [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
1036; CHECK-NEXT:    [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
1037; CHECK-NEXT:    [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
1038; CHECK-NEXT:    [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
1039; CHECK-NEXT:    [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
1040; CHECK-NEXT:    [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
1041; CHECK-NEXT:    [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
1042; CHECK-NEXT:    [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
1043; CHECK-NEXT:    [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
1044; CHECK-NEXT:    [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
1045; CHECK-NEXT:    [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
1046; CHECK-NEXT:    [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
1047; CHECK-NEXT:    [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
1048; CHECK-NEXT:    [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
1049; CHECK-NEXT:    [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
1050; CHECK-NEXT:    [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
1051; CHECK-NEXT:    [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
1052; CHECK-NEXT:    [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
1053; CHECK-NEXT:    [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
1054; CHECK-NEXT:    [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
1055; CHECK-NEXT:    [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
1056; CHECK-NEXT:    [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
1057; CHECK-NEXT:    [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 true)
1058; CHECK-NEXT:    [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 true)
1059; CHECK-NEXT:    [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 true)
1060; CHECK-NEXT:    [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 true)
1061; CHECK-NEXT:    [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 true)
1062; CHECK-NEXT:    [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 true)
1063; CHECK-NEXT:    [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 true)
1064; CHECK-NEXT:    [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 true)
1065; CHECK-NEXT:    [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 true)
1066; CHECK-NEXT:    [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 true)
1067; CHECK-NEXT:    [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 true)
1068; CHECK-NEXT:    [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 true)
1069; CHECK-NEXT:    [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 true)
1070; CHECK-NEXT:    [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 true)
1071; CHECK-NEXT:    [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 true)
1072; CHECK-NEXT:    [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 true)
1073; CHECK-NEXT:    [[CTLZ16:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD16]], i1 true)
1074; CHECK-NEXT:    [[CTLZ17:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD17]], i1 true)
1075; CHECK-NEXT:    [[CTLZ18:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD18]], i1 true)
1076; CHECK-NEXT:    [[CTLZ19:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD19]], i1 true)
1077; CHECK-NEXT:    [[CTLZ20:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD20]], i1 true)
1078; CHECK-NEXT:    [[CTLZ21:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD21]], i1 true)
1079; CHECK-NEXT:    [[CTLZ22:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD22]], i1 true)
1080; CHECK-NEXT:    [[CTLZ23:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD23]], i1 true)
1081; CHECK-NEXT:    [[CTLZ24:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD24]], i1 true)
1082; CHECK-NEXT:    [[CTLZ25:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD25]], i1 true)
1083; CHECK-NEXT:    [[CTLZ26:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD26]], i1 true)
1084; CHECK-NEXT:    [[CTLZ27:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD27]], i1 true)
1085; CHECK-NEXT:    [[CTLZ28:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD28]], i1 true)
1086; CHECK-NEXT:    [[CTLZ29:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD29]], i1 true)
1087; CHECK-NEXT:    [[CTLZ30:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD30]], i1 true)
1088; CHECK-NEXT:    [[CTLZ31:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD31]], i1 true)
1089; CHECK-NEXT:    store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
1090; CHECK-NEXT:    store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
1091; CHECK-NEXT:    store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
1092; CHECK-NEXT:    store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
1093; CHECK-NEXT:    store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
1094; CHECK-NEXT:    store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
1095; CHECK-NEXT:    store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
1096; CHECK-NEXT:    store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
1097; CHECK-NEXT:    store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
1098; CHECK-NEXT:    store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
1099; CHECK-NEXT:    store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
1100; CHECK-NEXT:    store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
1101; CHECK-NEXT:    store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
1102; CHECK-NEXT:    store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
1103; CHECK-NEXT:    store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
1104; CHECK-NEXT:    store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
1105; CHECK-NEXT:    store i8 [[CTLZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
1106; CHECK-NEXT:    store i8 [[CTLZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
1107; CHECK-NEXT:    store i8 [[CTLZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
1108; CHECK-NEXT:    store i8 [[CTLZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
1109; CHECK-NEXT:    store i8 [[CTLZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
1110; CHECK-NEXT:    store i8 [[CTLZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
1111; CHECK-NEXT:    store i8 [[CTLZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
1112; CHECK-NEXT:    store i8 [[CTLZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
1113; CHECK-NEXT:    store i8 [[CTLZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
1114; CHECK-NEXT:    store i8 [[CTLZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
1115; CHECK-NEXT:    store i8 [[CTLZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
1116; CHECK-NEXT:    store i8 [[CTLZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
1117; CHECK-NEXT:    store i8 [[CTLZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
1118; CHECK-NEXT:    store i8 [[CTLZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
1119; CHECK-NEXT:    store i8 [[CTLZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
1120; CHECK-NEXT:    store i8 [[CTLZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
1121; CHECK-NEXT:    ret void
1122;
1123  %ld0  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  0), align 1
1124  %ld1  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  1), align 1
1125  %ld2  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  2), align 1
1126  %ld3  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  3), align 1
1127  %ld4  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  4), align 1
1128  %ld5  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  5), align 1
1129  %ld6  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  6), align 1
1130  %ld7  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  7), align 1
1131  %ld8  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  8), align 1
1132  %ld9  = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64  9), align 1
1133  %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
1134  %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
1135  %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
1136  %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
1137  %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
1138  %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
1139  %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
1140  %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
1141  %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
1142  %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
1143  %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
1144  %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
1145  %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
1146  %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
1147  %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
1148  %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
1149  %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
1150  %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
1151  %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
1152  %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
1153  %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
1154  %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
1155  %ctlz0  = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
1156  %ctlz1  = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
1157  %ctlz2  = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
1158  %ctlz3  = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
1159  %ctlz4  = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
1160  %ctlz5  = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
1161  %ctlz6  = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
1162  %ctlz7  = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
1163  %ctlz8  = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
1164  %ctlz9  = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
1165  %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
1166  %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
1167  %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
1168  %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
1169  %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
1170  %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
1171  %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 -1)
1172  %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 -1)
1173  %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 -1)
1174  %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 -1)
1175  %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 -1)
1176  %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 -1)
1177  %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 -1)
1178  %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 -1)
1179  %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 -1)
1180  %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 -1)
1181  %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 -1)
1182  %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 -1)
1183  %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 -1)
1184  %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 -1)
1185  %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 -1)
1186  %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 -1)
1187  store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  0), align 1
1188  store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  1), align 1
1189  store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  2), align 1
1190  store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  3), align 1
1191  store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  4), align 1
1192  store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  5), align 1
1193  store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  6), align 1
1194  store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  7), align 1
1195  store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  8), align 1
1196  store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64  9), align 1
1197  store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
1198  store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
1199  store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
1200  store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
1201  store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
1202  store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
1203  store i8 %ctlz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
1204  store i8 %ctlz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
1205  store i8 %ctlz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
1206  store i8 %ctlz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
1207  store i8 %ctlz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
1208  store i8 %ctlz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
1209  store i8 %ctlz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
1210  store i8 %ctlz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
1211  store i8 %ctlz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
1212  store i8 %ctlz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
1213  store i8 %ctlz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
1214  store i8 %ctlz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
1215  store i8 %ctlz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
1216  store i8 %ctlz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
1217  store i8 %ctlz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
1218  store i8 %ctlz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
1219  ret void
1220}
1221
1222attributes #0 = { nounwind }
1223