1; RUN: llc < %s -mtriple=arm64-unknown | FileCheck %s
2
3; i8* p; // p is 1 byte aligned
4; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
5define i32 @load_i32_by_i8_unaligned(i32* %arg) {
6; CHECK-LABEL: load_i32_by_i8_unaligned:
7; CHECK: ldr		w0, [x0]
8; CHECK-NEXT: ret
9  %tmp = bitcast i32* %arg to i8*
10  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
11  %tmp2 = load i8, i8* %tmp1, align 1
12  %tmp3 = zext i8 %tmp2 to i32
13  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
14  %tmp5 = load i8, i8* %tmp4, align 1
15  %tmp6 = zext i8 %tmp5 to i32
16  %tmp7 = shl nuw nsw i32 %tmp6, 8
17  %tmp8 = or i32 %tmp7, %tmp3
18  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
19  %tmp10 = load i8, i8* %tmp9, align 1
20  %tmp11 = zext i8 %tmp10 to i32
21  %tmp12 = shl nuw nsw i32 %tmp11, 16
22  %tmp13 = or i32 %tmp8, %tmp12
23  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
24  %tmp15 = load i8, i8* %tmp14, align 1
25  %tmp16 = zext i8 %tmp15 to i32
26  %tmp17 = shl nuw nsw i32 %tmp16, 24
27  %tmp18 = or i32 %tmp13, %tmp17
28  ret i32 %tmp18
29}
30
31; i8* p; // p is 4 byte aligned
32; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
33define i32 @load_i32_by_i8_aligned(i32* %arg) {
34; CHECK-LABEL: load_i32_by_i8_aligned:
35; CHECK: ldr    w0, [x0]
36; CHECK-NEXT: ret
37  %tmp = bitcast i32* %arg to i8*
38  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
39  %tmp2 = load i8, i8* %tmp1, align 4
40  %tmp3 = zext i8 %tmp2 to i32
41  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
42  %tmp5 = load i8, i8* %tmp4, align 1
43  %tmp6 = zext i8 %tmp5 to i32
44  %tmp7 = shl nuw nsw i32 %tmp6, 8
45  %tmp8 = or i32 %tmp7, %tmp3
46  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
47  %tmp10 = load i8, i8* %tmp9, align 1
48  %tmp11 = zext i8 %tmp10 to i32
49  %tmp12 = shl nuw nsw i32 %tmp11, 16
50  %tmp13 = or i32 %tmp8, %tmp12
51  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
52  %tmp15 = load i8, i8* %tmp14, align 1
53  %tmp16 = zext i8 %tmp15 to i32
54  %tmp17 = shl nuw nsw i32 %tmp16, 24
55  %tmp18 = or i32 %tmp13, %tmp17
56  ret i32 %tmp18
57}
58
59; i8* p; // p is 4 byte aligned
60; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
61define i32 @load_i32_by_i8_bswap(i32* %arg) {
62; CHECK-LABEL: load_i32_by_i8_bswap:
63; CHECK: ldr		w8, [x0]
64; CHECK-NEXT: rev	w0, w8
65; CHECK-NEXT: ret
66  %tmp = bitcast i32* %arg to i8*
67  %tmp1 = load i8, i8* %tmp, align 4
68  %tmp2 = zext i8 %tmp1 to i32
69  %tmp3 = shl nuw nsw i32 %tmp2, 24
70  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
71  %tmp5 = load i8, i8* %tmp4, align 1
72  %tmp6 = zext i8 %tmp5 to i32
73  %tmp7 = shl nuw nsw i32 %tmp6, 16
74  %tmp8 = or i32 %tmp7, %tmp3
75  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
76  %tmp10 = load i8, i8* %tmp9, align 1
77  %tmp11 = zext i8 %tmp10 to i32
78  %tmp12 = shl nuw nsw i32 %tmp11, 8
79  %tmp13 = or i32 %tmp8, %tmp12
80  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
81  %tmp15 = load i8, i8* %tmp14, align 1
82  %tmp16 = zext i8 %tmp15 to i32
83  %tmp17 = or i32 %tmp13, %tmp16
84  ret i32 %tmp17
85}
86
87; i8* p; // p is 8 byte aligned
88; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
89define i64 @load_i64_by_i8(i64* %arg) {
90; CHECK-LABEL: load_i64_by_i8:
91; CHECK: ldr		x0, [x0]
92; CHECK-NEXT: ret
93  %tmp = bitcast i64* %arg to i8*
94  %tmp1 = load i8, i8* %tmp, align 8
95  %tmp2 = zext i8 %tmp1 to i64
96  %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
97  %tmp4 = load i8, i8* %tmp3, align 1
98  %tmp5 = zext i8 %tmp4 to i64
99  %tmp6 = shl nuw nsw i64 %tmp5, 8
100  %tmp7 = or i64 %tmp6, %tmp2
101  %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
102  %tmp9 = load i8, i8* %tmp8, align 1
103  %tmp10 = zext i8 %tmp9 to i64
104  %tmp11 = shl nuw nsw i64 %tmp10, 16
105  %tmp12 = or i64 %tmp7, %tmp11
106  %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
107  %tmp14 = load i8, i8* %tmp13, align 1
108  %tmp15 = zext i8 %tmp14 to i64
109  %tmp16 = shl nuw nsw i64 %tmp15, 24
110  %tmp17 = or i64 %tmp12, %tmp16
111  %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
112  %tmp19 = load i8, i8* %tmp18, align 1
113  %tmp20 = zext i8 %tmp19 to i64
114  %tmp21 = shl nuw nsw i64 %tmp20, 32
115  %tmp22 = or i64 %tmp17, %tmp21
116  %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
117  %tmp24 = load i8, i8* %tmp23, align 1
118  %tmp25 = zext i8 %tmp24 to i64
119  %tmp26 = shl nuw nsw i64 %tmp25, 40
120  %tmp27 = or i64 %tmp22, %tmp26
121  %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
122  %tmp29 = load i8, i8* %tmp28, align 1
123  %tmp30 = zext i8 %tmp29 to i64
124  %tmp31 = shl nuw nsw i64 %tmp30, 48
125  %tmp32 = or i64 %tmp27, %tmp31
126  %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
127  %tmp34 = load i8, i8* %tmp33, align 1
128  %tmp35 = zext i8 %tmp34 to i64
129  %tmp36 = shl nuw i64 %tmp35, 56
130  %tmp37 = or i64 %tmp32, %tmp36
131  ret i64 %tmp37
132}
133
134; i8* p; // p is 8 byte aligned
135; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
136define i64 @load_i64_by_i8_bswap(i64* %arg) {
137; CHECK-LABEL: load_i64_by_i8_bswap:
138; CHECK: ldr		x8, [x0]
139; CHECK-NEXT: rev	x0, x8
140; CHECK-NEXT: ret
141  %tmp = bitcast i64* %arg to i8*
142  %tmp1 = load i8, i8* %tmp, align 8
143  %tmp2 = zext i8 %tmp1 to i64
144  %tmp3 = shl nuw i64 %tmp2, 56
145  %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
146  %tmp5 = load i8, i8* %tmp4, align 1
147  %tmp6 = zext i8 %tmp5 to i64
148  %tmp7 = shl nuw nsw i64 %tmp6, 48
149  %tmp8 = or i64 %tmp7, %tmp3
150  %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
151  %tmp10 = load i8, i8* %tmp9, align 1
152  %tmp11 = zext i8 %tmp10 to i64
153  %tmp12 = shl nuw nsw i64 %tmp11, 40
154  %tmp13 = or i64 %tmp8, %tmp12
155  %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
156  %tmp15 = load i8, i8* %tmp14, align 1
157  %tmp16 = zext i8 %tmp15 to i64
158  %tmp17 = shl nuw nsw i64 %tmp16, 32
159  %tmp18 = or i64 %tmp13, %tmp17
160  %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
161  %tmp20 = load i8, i8* %tmp19, align 1
162  %tmp21 = zext i8 %tmp20 to i64
163  %tmp22 = shl nuw nsw i64 %tmp21, 24
164  %tmp23 = or i64 %tmp18, %tmp22
165  %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
166  %tmp25 = load i8, i8* %tmp24, align 1
167  %tmp26 = zext i8 %tmp25 to i64
168  %tmp27 = shl nuw nsw i64 %tmp26, 16
169  %tmp28 = or i64 %tmp23, %tmp27
170  %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
171  %tmp30 = load i8, i8* %tmp29, align 1
172  %tmp31 = zext i8 %tmp30 to i64
173  %tmp32 = shl nuw nsw i64 %tmp31, 8
174  %tmp33 = or i64 %tmp28, %tmp32
175  %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
176  %tmp35 = load i8, i8* %tmp34, align 1
177  %tmp36 = zext i8 %tmp35 to i64
178  %tmp37 = or i64 %tmp33, %tmp36
179  ret i64 %tmp37
180}
181
182; i8* p; // p[1] is 4 byte aligned
183; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
184define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
185; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
186; CHECK: ldur w0, [x0, #1]
187; CHECK-NEXT: ret
188
189  %tmp = bitcast i32* %arg to i8*
190  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
191  %tmp2 = load i8, i8* %tmp1, align 4
192  %tmp3 = zext i8 %tmp2 to i32
193  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
194  %tmp5 = load i8, i8* %tmp4, align 1
195  %tmp6 = zext i8 %tmp5 to i32
196  %tmp7 = shl nuw nsw i32 %tmp6, 8
197  %tmp8 = or i32 %tmp7, %tmp3
198  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
199  %tmp10 = load i8, i8* %tmp9, align 1
200  %tmp11 = zext i8 %tmp10 to i32
201  %tmp12 = shl nuw nsw i32 %tmp11, 16
202  %tmp13 = or i32 %tmp8, %tmp12
203  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
204  %tmp15 = load i8, i8* %tmp14, align 1
205  %tmp16 = zext i8 %tmp15 to i32
206  %tmp17 = shl nuw nsw i32 %tmp16, 24
207  %tmp18 = or i32 %tmp13, %tmp17
208  ret i32 %tmp18
209}
210
211; i8* p; // p[-4] is 4 byte aligned
212; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
213define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
214; CHECK-LABEL: load_i32_by_i8_neg_offset:
215; CHECK: ldur w0, [x0, #-4]
216; CHECK-NEXT: ret
217
218  %tmp = bitcast i32* %arg to i8*
219  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
220  %tmp2 = load i8, i8* %tmp1, align 4
221  %tmp3 = zext i8 %tmp2 to i32
222  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
223  %tmp5 = load i8, i8* %tmp4, align 1
224  %tmp6 = zext i8 %tmp5 to i32
225  %tmp7 = shl nuw nsw i32 %tmp6, 8
226  %tmp8 = or i32 %tmp7, %tmp3
227  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
228  %tmp10 = load i8, i8* %tmp9, align 1
229  %tmp11 = zext i8 %tmp10 to i32
230  %tmp12 = shl nuw nsw i32 %tmp11, 16
231  %tmp13 = or i32 %tmp8, %tmp12
232  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
233  %tmp15 = load i8, i8* %tmp14, align 1
234  %tmp16 = zext i8 %tmp15 to i32
235  %tmp17 = shl nuw nsw i32 %tmp16, 24
236  %tmp18 = or i32 %tmp13, %tmp17
237  ret i32 %tmp18
238}
239
240; i8* p; // p[1] is 4 byte aligned
241; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
242define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
243; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
244; CHECK: ldur  w8, [x0, #1]
245; CHECK-NEXT: rev w0, w8
246; CHECK-NEXT: ret
247
248  %tmp = bitcast i32* %arg to i8*
249  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
250  %tmp2 = load i8, i8* %tmp1, align 1
251  %tmp3 = zext i8 %tmp2 to i32
252  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
253  %tmp5 = load i8, i8* %tmp4, align 1
254  %tmp6 = zext i8 %tmp5 to i32
255  %tmp7 = shl nuw nsw i32 %tmp6, 8
256  %tmp8 = or i32 %tmp7, %tmp3
257  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
258  %tmp10 = load i8, i8* %tmp9, align 1
259  %tmp11 = zext i8 %tmp10 to i32
260  %tmp12 = shl nuw nsw i32 %tmp11, 16
261  %tmp13 = or i32 %tmp8, %tmp12
262  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
263  %tmp15 = load i8, i8* %tmp14, align 4
264  %tmp16 = zext i8 %tmp15 to i32
265  %tmp17 = shl nuw nsw i32 %tmp16, 24
266  %tmp18 = or i32 %tmp13, %tmp17
267  ret i32 %tmp18
268}
269
270; i8* p; // p[-4] is 4 byte aligned
271; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
272define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
273; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
274; CHECK: ldur  w8, [x0, #-4]
275; CHECK-NEXT: rev w0, w8
276; CHECK-NEXT: ret
277
278  %tmp = bitcast i32* %arg to i8*
279  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
280  %tmp2 = load i8, i8* %tmp1, align 1
281  %tmp3 = zext i8 %tmp2 to i32
282  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
283  %tmp5 = load i8, i8* %tmp4, align 1
284  %tmp6 = zext i8 %tmp5 to i32
285  %tmp7 = shl nuw nsw i32 %tmp6, 8
286  %tmp8 = or i32 %tmp7, %tmp3
287  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
288  %tmp10 = load i8, i8* %tmp9, align 1
289  %tmp11 = zext i8 %tmp10 to i32
290  %tmp12 = shl nuw nsw i32 %tmp11, 16
291  %tmp13 = or i32 %tmp8, %tmp12
292  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
293  %tmp15 = load i8, i8* %tmp14, align 4
294  %tmp16 = zext i8 %tmp15 to i32
295  %tmp17 = shl nuw nsw i32 %tmp16, 24
296  %tmp18 = or i32 %tmp13, %tmp17
297  ret i32 %tmp18
298}
299
300declare i16 @llvm.bswap.i16(i16)
301
302; i16* p; // p is 4 byte aligned
303; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
304define i32 @load_i32_by_bswap_i16(i32* %arg) {
305; CHECK-LABEL: load_i32_by_bswap_i16:
306; CHECK: ldr    w8, [x0]
307; CHECK-NEXT: rev w0, w8
308; CHECK-NEXT: ret
309
310  %tmp = bitcast i32* %arg to i16*
311  %tmp1 = load i16, i16* %tmp, align 4
312  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
313  %tmp2 = zext i16 %tmp11 to i32
314  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
315  %tmp4 = load i16, i16* %tmp3, align 1
316  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
317  %tmp5 = zext i16 %tmp41 to i32
318  %tmp6 = shl nuw nsw i32 %tmp2, 16
319  %tmp7 = or i32 %tmp6, %tmp5
320  ret i32 %tmp7
321}
322
323; i16* p; // p is 4 byte aligned
324; (i32) p[0] | (sext(p[1] << 16) to i32)
325define i32 @load_i32_by_sext_i16(i32* %arg) {
326; CHECK-LABEL: load_i32_by_sext_i16:
327; CHECK: ldr   w0, [x0]
328; CHECK-NEXT: ret
329  %tmp = bitcast i32* %arg to i16*
330  %tmp1 = load i16, i16* %tmp, align 4
331  %tmp2 = zext i16 %tmp1 to i32
332  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
333  %tmp4 = load i16, i16* %tmp3, align 1
334  %tmp5 = sext i16 %tmp4 to i32
335  %tmp6 = shl nuw nsw i32 %tmp5, 16
336  %tmp7 = or i32 %tmp6, %tmp2
337  ret i32 %tmp7
338}
339
340; i8* arg; i32 i;
341; p = arg + 12;
342; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
343define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
344; CHECK-LABEL: load_i32_by_i8_base_offset_index:
345; CHECK: add x8, x0, w1, uxtw
346; CHECK-NEXT: ldr w0, [x8, #12]
347; CHECK-NEXT: ret
348  %tmp = add nuw nsw i32 %i, 3
349  %tmp2 = add nuw nsw i32 %i, 2
350  %tmp3 = add nuw nsw i32 %i, 1
351  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
352  %tmp5 = zext i32 %i to i64
353  %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
354  %tmp7 = load i8, i8* %tmp6, align 4
355  %tmp8 = zext i8 %tmp7 to i32
356  %tmp9 = zext i32 %tmp3 to i64
357  %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
358  %tmp11 = load i8, i8* %tmp10, align 1
359  %tmp12 = zext i8 %tmp11 to i32
360  %tmp13 = shl nuw nsw i32 %tmp12, 8
361  %tmp14 = or i32 %tmp13, %tmp8
362  %tmp15 = zext i32 %tmp2 to i64
363  %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
364  %tmp17 = load i8, i8* %tmp16, align 1
365  %tmp18 = zext i8 %tmp17 to i32
366  %tmp19 = shl nuw nsw i32 %tmp18, 16
367  %tmp20 = or i32 %tmp14, %tmp19
368  %tmp21 = zext i32 %tmp to i64
369  %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
370  %tmp23 = load i8, i8* %tmp22, align 1
371  %tmp24 = zext i8 %tmp23 to i32
372  %tmp25 = shl nuw i32 %tmp24, 24
373  %tmp26 = or i32 %tmp20, %tmp25
374  ret i32 %tmp26
375}
376
377; i8* arg; i32 i;
378; p = arg + 12;
379; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
380define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
381; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
382; CHECK: add x8, x0, w1, uxtw
383; CHECK-NEXT: ldur  w0, [x8, #13]
384; CHECK-NEXT: ret
385  %tmp = add nuw nsw i32 %i, 4
386  %tmp2 = add nuw nsw i32 %i, 3
387  %tmp3 = add nuw nsw i32 %i, 2
388  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
389  %tmp5 = add nuw nsw i32 %i, 1
390  %tmp27 = zext i32 %tmp5 to i64
391  %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
392  %tmp29 = load i8, i8* %tmp28, align 4
393  %tmp30 = zext i8 %tmp29 to i32
394  %tmp31 = zext i32 %tmp3 to i64
395  %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
396  %tmp33 = load i8, i8* %tmp32, align 1
397  %tmp34 = zext i8 %tmp33 to i32
398  %tmp35 = shl nuw nsw i32 %tmp34, 8
399  %tmp36 = or i32 %tmp35, %tmp30
400  %tmp37 = zext i32 %tmp2 to i64
401  %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
402  %tmp39 = load i8, i8* %tmp38, align 1
403  %tmp40 = zext i8 %tmp39 to i32
404  %tmp41 = shl nuw nsw i32 %tmp40, 16
405  %tmp42 = or i32 %tmp36, %tmp41
406  %tmp43 = zext i32 %tmp to i64
407  %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
408  %tmp45 = load i8, i8* %tmp44, align 1
409  %tmp46 = zext i8 %tmp45 to i32
410  %tmp47 = shl nuw i32 %tmp46, 24
411  %tmp48 = or i32 %tmp42, %tmp47
412  ret i32 %tmp48
413}
414
415; i8* p; // p is 2 byte aligned
416; (i32) p[0] | ((i32) p[1] << 8)
417define i32 @zext_load_i32_by_i8(i32* %arg) {
418; CHECK-LABEL: zext_load_i32_by_i8:
419; CHECK: ldrb  w8, [x0]
420; CHECK-NEXT: ldrb  w9, [x0, #1]
421; CHECK-NEXT: bfi w8, w9, #8, #8
422; CHECK-NEXT: mov  w0, w8
423; CHECK-NEXT: ret
424
425  %tmp = bitcast i32* %arg to i8*
426  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
427  %tmp2 = load i8, i8* %tmp1, align 2
428  %tmp3 = zext i8 %tmp2 to i32
429  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
430  %tmp5 = load i8, i8* %tmp4, align 1
431  %tmp6 = zext i8 %tmp5 to i32
432  %tmp7 = shl nuw nsw i32 %tmp6, 8
433  %tmp8 = or i32 %tmp7, %tmp3
434  ret i32 %tmp8
435}
436
437; i8* p; // p is 2 byte aligned
438; ((i32) p[0] << 8) | ((i32) p[1] << 16)
439define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
440; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
441; CHECK: ldrb  w8, [x0]
442; CHECK-NEXT: ldrb  w9, [x0, #1]
443; CHECK-NEXT: lsl w0, w8, #8
444; CHECK-NEXT: bfi w0, w9, #16, #8
445; CHECK-NEXT: ret
446
447  %tmp = bitcast i32* %arg to i8*
448  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
449  %tmp2 = load i8, i8* %tmp1, align 2
450  %tmp3 = zext i8 %tmp2 to i32
451  %tmp30 = shl nuw nsw i32 %tmp3, 8
452  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
453  %tmp5 = load i8, i8* %tmp4, align 1
454  %tmp6 = zext i8 %tmp5 to i32
455  %tmp7 = shl nuw nsw i32 %tmp6, 16
456  %tmp8 = or i32 %tmp7, %tmp30
457  ret i32 %tmp8
458}
459
460; i8* p; // p is 2 byte aligned
461; ((i32) p[0] << 16) | ((i32) p[1] << 24)
462define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
463; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
464; CHECK: ldrb  w8, [x0]
465; CHECK-NEXT: ldrb  w9, [x0, #1]
466; CHECK-NEXT: lsl w0, w8, #16
467; CHECK-NEXT: bfi w0, w9, #24, #8
468; CHECK-NEXT: ret
469
470  %tmp = bitcast i32* %arg to i8*
471  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
472  %tmp2 = load i8, i8* %tmp1, align 2
473  %tmp3 = zext i8 %tmp2 to i32
474  %tmp30 = shl nuw nsw i32 %tmp3, 16
475  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
476  %tmp5 = load i8, i8* %tmp4, align 1
477  %tmp6 = zext i8 %tmp5 to i32
478  %tmp7 = shl nuw nsw i32 %tmp6, 24
479  %tmp8 = or i32 %tmp7, %tmp30
480  ret i32 %tmp8
481}
482; i8* p; // p is 2 byte aligned
483; (i32) p[1] | ((i32) p[0] << 8)
484define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
485; CHECK-LABEL: zext_load_i32_by_i8_bswap:
486; CHECK: ldrb  w8, [x0, #1]
487; CHECK-NEXT: ldrb    w9, [x0]
488; CHECK-NEXT: bfi w8, w9, #8, #8
489; CHECK-NEXT: mov  w0, w8
490; CHECK-NEXT: ret
491
492  %tmp = bitcast i32* %arg to i8*
493  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
494  %tmp2 = load i8, i8* %tmp1, align 1
495  %tmp3 = zext i8 %tmp2 to i32
496  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
497  %tmp5 = load i8, i8* %tmp4, align 2
498  %tmp6 = zext i8 %tmp5 to i32
499  %tmp7 = shl nuw nsw i32 %tmp6, 8
500  %tmp8 = or i32 %tmp7, %tmp3
501  ret i32 %tmp8
502}
503
504; i8* p; // p is 2 byte aligned
505; ((i32) p[1] << 8) | ((i32) p[0] << 16)
506define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
507; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
508; CHECK: ldrb  w8, [x0, #1]
509; CHECK-NEXT: ldrb    w9, [x0]
510; CHECK-NEXT: lsl w0, w8, #8
511; CHECK-NEXT: bfi w0, w9, #16, #8
512; CHECK-NEXT: ret
513
514  %tmp = bitcast i32* %arg to i8*
515  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
516  %tmp2 = load i8, i8* %tmp1, align 1
517  %tmp3 = zext i8 %tmp2 to i32
518  %tmp30 = shl nuw nsw i32 %tmp3, 8
519  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
520  %tmp5 = load i8, i8* %tmp4, align 2
521  %tmp6 = zext i8 %tmp5 to i32
522  %tmp7 = shl nuw nsw i32 %tmp6, 16
523  %tmp8 = or i32 %tmp7, %tmp30
524  ret i32 %tmp8
525}
526
527; i8* p; // p is 2 byte aligned
528; ((i32) p[1] << 16) | ((i32) p[0] << 24)
529define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
530; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
531; CHECK: ldrb  w8, [x0, #1]
532; CHECK-NEXT: ldrb    w9, [x0]
533; CHECK-NEXT: lsl w0, w8, #16
534; CHECK-NEXT: bfi w0, w9, #24, #8
535; CHECK-NEXT: ret
536
537  %tmp = bitcast i32* %arg to i8*
538  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
539  %tmp2 = load i8, i8* %tmp1, align 1
540  %tmp3 = zext i8 %tmp2 to i32
541  %tmp30 = shl nuw nsw i32 %tmp3, 16
542  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
543  %tmp5 = load i8, i8* %tmp4, align 2
544  %tmp6 = zext i8 %tmp5 to i32
545  %tmp7 = shl nuw nsw i32 %tmp6, 24
546  %tmp8 = or i32 %tmp7, %tmp30
547  ret i32 %tmp8
548}
549