1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-unknown | FileCheck %s
3; RUN: llc < %s -mtriple=armv6-unknown | FileCheck %s --check-prefix=CHECK-ARMv6
4; RUN: llc < %s -mtriple=thumbv6m-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv6
5; RUN: llc < %s -mtriple=thumbv7m-none-eabi | FileCheck %s --check-prefix=CHECK-THUMBv7
6
7; i8* p; // p is 1 byte aligned
8; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
9define i32 @load_i32_by_i8_unaligned(i32* %arg) {
10; CHECK-LABEL: load_i32_by_i8_unaligned:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    ldrb r2, [r0, #1]
13; CHECK-NEXT:    ldrb r1, [r0]
14; CHECK-NEXT:    ldrb r3, [r0, #2]
15; CHECK-NEXT:    ldrb r0, [r0, #3]
16; CHECK-NEXT:    orr r1, r1, r2, lsl #8
17; CHECK-NEXT:    orr r1, r1, r3, lsl #16
18; CHECK-NEXT:    orr r0, r1, r0, lsl #24
19; CHECK-NEXT:    mov pc, lr
20;
21; CHECK-ARMv6-LABEL: load_i32_by_i8_unaligned:
22; CHECK-ARMv6:       @ %bb.0:
23; CHECK-ARMv6-NEXT:    ldrb r2, [r0, #1]
24; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
25; CHECK-ARMv6-NEXT:    ldrb r3, [r0, #2]
26; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #3]
27; CHECK-ARMv6-NEXT:    orr r1, r1, r2, lsl #8
28; CHECK-ARMv6-NEXT:    orr r1, r1, r3, lsl #16
29; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #24
30; CHECK-ARMv6-NEXT:    bx lr
31;
32; CHECK-THUMBv6-LABEL: load_i32_by_i8_unaligned:
33; CHECK-THUMBv6:       @ %bb.0:
34; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
35; CHECK-THUMBv6-NEXT:    ldrb r2, [r0, #1]
36; CHECK-THUMBv6-NEXT:    lsls r2, r2, #8
37; CHECK-THUMBv6-NEXT:    adds r1, r2, r1
38; CHECK-THUMBv6-NEXT:    ldrb r2, [r0, #2]
39; CHECK-THUMBv6-NEXT:    lsls r2, r2, #16
40; CHECK-THUMBv6-NEXT:    adds r1, r1, r2
41; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #3]
42; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
43; CHECK-THUMBv6-NEXT:    adds r0, r1, r0
44; CHECK-THUMBv6-NEXT:    bx lr
45;
46; CHECK-THUMBv7-LABEL: load_i32_by_i8_unaligned:
47; CHECK-THUMBv7:       @ %bb.0:
48; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
49; CHECK-THUMBv7-NEXT:    bx lr
50
51  %tmp = bitcast i32* %arg to i8*
52  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
53  %tmp2 = load i8, i8* %tmp1, align 1
54  %tmp3 = zext i8 %tmp2 to i32
55  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
56  %tmp5 = load i8, i8* %tmp4, align 1
57  %tmp6 = zext i8 %tmp5 to i32
58  %tmp7 = shl nuw nsw i32 %tmp6, 8
59  %tmp8 = or i32 %tmp7, %tmp3
60  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
61  %tmp10 = load i8, i8* %tmp9, align 1
62  %tmp11 = zext i8 %tmp10 to i32
63  %tmp12 = shl nuw nsw i32 %tmp11, 16
64  %tmp13 = or i32 %tmp8, %tmp12
65  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
66  %tmp15 = load i8, i8* %tmp14, align 1
67  %tmp16 = zext i8 %tmp15 to i32
68  %tmp17 = shl nuw nsw i32 %tmp16, 24
69  %tmp18 = or i32 %tmp13, %tmp17
70  ret i32 %tmp18
71}
72
73; i8* p; // p is 4 byte aligned
74; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
75define i32 @load_i32_by_i8_aligned(i32* %arg) {
76; CHECK-LABEL: load_i32_by_i8_aligned:
77; CHECK:       @ %bb.0:
78; CHECK-NEXT:    ldr r0, [r0]
79; CHECK-NEXT:    mov pc, lr
80;
81; CHECK-ARMv6-LABEL: load_i32_by_i8_aligned:
82; CHECK-ARMv6:       @ %bb.0:
83; CHECK-ARMv6-NEXT:    ldr r0, [r0]
84; CHECK-ARMv6-NEXT:    bx lr
85;
86; CHECK-THUMBv6-LABEL: load_i32_by_i8_aligned:
87; CHECK-THUMBv6:       @ %bb.0:
88; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
89; CHECK-THUMBv6-NEXT:    bx lr
90;
91; CHECK-THUMBv7-LABEL: load_i32_by_i8_aligned:
92; CHECK-THUMBv7:       @ %bb.0:
93; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
94; CHECK-THUMBv7-NEXT:    bx lr
95
96  %tmp = bitcast i32* %arg to i8*
97  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
98  %tmp2 = load i8, i8* %tmp1, align 4
99  %tmp3 = zext i8 %tmp2 to i32
100  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
101  %tmp5 = load i8, i8* %tmp4, align 1
102  %tmp6 = zext i8 %tmp5 to i32
103  %tmp7 = shl nuw nsw i32 %tmp6, 8
104  %tmp8 = or i32 %tmp7, %tmp3
105  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
106  %tmp10 = load i8, i8* %tmp9, align 1
107  %tmp11 = zext i8 %tmp10 to i32
108  %tmp12 = shl nuw nsw i32 %tmp11, 16
109  %tmp13 = or i32 %tmp8, %tmp12
110  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
111  %tmp15 = load i8, i8* %tmp14, align 1
112  %tmp16 = zext i8 %tmp15 to i32
113  %tmp17 = shl nuw nsw i32 %tmp16, 24
114  %tmp18 = or i32 %tmp13, %tmp17
115  ret i32 %tmp18
116}
117
118; i8* p; // p is 4 byte aligned
119; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
120define i32 @load_i32_by_i8_bswap(i32* %arg) {
121; BSWAP is not supported by 32 bit target
122; CHECK-LABEL: load_i32_by_i8_bswap:
123; CHECK:       @ %bb.0:
124; CHECK-NEXT:    ldr r0, [r0]
125; CHECK-NEXT:    mov r1, #65280
126; CHECK-NEXT:    mov r2, #16711680
127; CHECK-NEXT:    and r1, r1, r0, lsr #8
128; CHECK-NEXT:    and r2, r2, r0, lsl #8
129; CHECK-NEXT:    orr r1, r1, r0, lsr #24
130; CHECK-NEXT:    orr r0, r2, r0, lsl #24
131; CHECK-NEXT:    orr r0, r0, r1
132; CHECK-NEXT:    mov pc, lr
133;
134; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap:
135; CHECK-ARMv6:       @ %bb.0:
136; CHECK-ARMv6-NEXT:    ldr r0, [r0]
137; CHECK-ARMv6-NEXT:    rev r0, r0
138; CHECK-ARMv6-NEXT:    bx lr
139;
140; CHECK-THUMBv6-LABEL: load_i32_by_i8_bswap:
141; CHECK-THUMBv6:       @ %bb.0:
142; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
143; CHECK-THUMBv6-NEXT:    rev r0, r0
144; CHECK-THUMBv6-NEXT:    bx lr
145;
146; CHECK-THUMBv7-LABEL: load_i32_by_i8_bswap:
147; CHECK-THUMBv7:       @ %bb.0:
148; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
149; CHECK-THUMBv7-NEXT:    rev r0, r0
150; CHECK-THUMBv7-NEXT:    bx lr
151
152  %tmp = bitcast i32* %arg to i8*
153  %tmp1 = load i8, i8* %tmp, align 4
154  %tmp2 = zext i8 %tmp1 to i32
155  %tmp3 = shl nuw nsw i32 %tmp2, 24
156  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
157  %tmp5 = load i8, i8* %tmp4, align 1
158  %tmp6 = zext i8 %tmp5 to i32
159  %tmp7 = shl nuw nsw i32 %tmp6, 16
160  %tmp8 = or i32 %tmp7, %tmp3
161  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
162  %tmp10 = load i8, i8* %tmp9, align 1
163  %tmp11 = zext i8 %tmp10 to i32
164  %tmp12 = shl nuw nsw i32 %tmp11, 8
165  %tmp13 = or i32 %tmp8, %tmp12
166  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 3
167  %tmp15 = load i8, i8* %tmp14, align 1
168  %tmp16 = zext i8 %tmp15 to i32
169  %tmp17 = or i32 %tmp13, %tmp16
170  ret i32 %tmp17
171}
172
173; i8* p; // p is 8 byte aligned
174; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
175define i64 @load_i64_by_i8(i64* %arg) {
176; CHECK-LABEL: load_i64_by_i8:
177; CHECK:       @ %bb.0:
178; CHECK-NEXT:    ldr r2, [r0]
179; CHECK-NEXT:    ldr r1, [r0, #4]
180; CHECK-NEXT:    mov r0, r2
181; CHECK-NEXT:    mov pc, lr
182;
183; CHECK-ARMv6-LABEL: load_i64_by_i8:
184; CHECK-ARMv6:       @ %bb.0:
185; CHECK-ARMv6-NEXT:    ldrd r0, r1, [r0]
186; CHECK-ARMv6-NEXT:    bx lr
187;
188; CHECK-THUMBv6-LABEL: load_i64_by_i8:
189; CHECK-THUMBv6:       @ %bb.0:
190; CHECK-THUMBv6-NEXT:    ldr r2, [r0]
191; CHECK-THUMBv6-NEXT:    ldr r1, [r0, #4]
192; CHECK-THUMBv6-NEXT:    mov r0, r2
193; CHECK-THUMBv6-NEXT:    bx lr
194;
195; CHECK-THUMBv7-LABEL: load_i64_by_i8:
196; CHECK-THUMBv7:       @ %bb.0:
197; CHECK-THUMBv7-NEXT:    ldrd r0, r1, [r0]
198; CHECK-THUMBv7-NEXT:    bx lr
199
200  %tmp = bitcast i64* %arg to i8*
201  %tmp1 = load i8, i8* %tmp, align 8
202  %tmp2 = zext i8 %tmp1 to i64
203  %tmp3 = getelementptr inbounds i8, i8* %tmp, i64 1
204  %tmp4 = load i8, i8* %tmp3, align 1
205  %tmp5 = zext i8 %tmp4 to i64
206  %tmp6 = shl nuw nsw i64 %tmp5, 8
207  %tmp7 = or i64 %tmp6, %tmp2
208  %tmp8 = getelementptr inbounds i8, i8* %tmp, i64 2
209  %tmp9 = load i8, i8* %tmp8, align 1
210  %tmp10 = zext i8 %tmp9 to i64
211  %tmp11 = shl nuw nsw i64 %tmp10, 16
212  %tmp12 = or i64 %tmp7, %tmp11
213  %tmp13 = getelementptr inbounds i8, i8* %tmp, i64 3
214  %tmp14 = load i8, i8* %tmp13, align 1
215  %tmp15 = zext i8 %tmp14 to i64
216  %tmp16 = shl nuw nsw i64 %tmp15, 24
217  %tmp17 = or i64 %tmp12, %tmp16
218  %tmp18 = getelementptr inbounds i8, i8* %tmp, i64 4
219  %tmp19 = load i8, i8* %tmp18, align 1
220  %tmp20 = zext i8 %tmp19 to i64
221  %tmp21 = shl nuw nsw i64 %tmp20, 32
222  %tmp22 = or i64 %tmp17, %tmp21
223  %tmp23 = getelementptr inbounds i8, i8* %tmp, i64 5
224  %tmp24 = load i8, i8* %tmp23, align 1
225  %tmp25 = zext i8 %tmp24 to i64
226  %tmp26 = shl nuw nsw i64 %tmp25, 40
227  %tmp27 = or i64 %tmp22, %tmp26
228  %tmp28 = getelementptr inbounds i8, i8* %tmp, i64 6
229  %tmp29 = load i8, i8* %tmp28, align 1
230  %tmp30 = zext i8 %tmp29 to i64
231  %tmp31 = shl nuw nsw i64 %tmp30, 48
232  %tmp32 = or i64 %tmp27, %tmp31
233  %tmp33 = getelementptr inbounds i8, i8* %tmp, i64 7
234  %tmp34 = load i8, i8* %tmp33, align 1
235  %tmp35 = zext i8 %tmp34 to i64
236  %tmp36 = shl nuw i64 %tmp35, 56
237  %tmp37 = or i64 %tmp32, %tmp36
238  ret i64 %tmp37
239}
240
241; i8* p; // p is 8 byte aligned
242; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
243define i64 @load_i64_by_i8_bswap(i64* %arg) {
244; CHECK-LABEL: load_i64_by_i8_bswap:
245; CHECK:       @ %bb.0:
246; CHECK-NEXT:    push {r11, lr}
247; CHECK-NEXT:    ldr r1, [r0]
248; CHECK-NEXT:    mov r12, #65280
249; CHECK-NEXT:    ldr r0, [r0, #4]
250; CHECK-NEXT:    mov lr, #16711680
251; CHECK-NEXT:    and r3, r12, r0, lsr #8
252; CHECK-NEXT:    and r2, lr, r0, lsl #8
253; CHECK-NEXT:    orr r3, r3, r0, lsr #24
254; CHECK-NEXT:    orr r0, r2, r0, lsl #24
255; CHECK-NEXT:    and r2, r12, r1, lsr #8
256; CHECK-NEXT:    orr r0, r0, r3
257; CHECK-NEXT:    and r3, lr, r1, lsl #8
258; CHECK-NEXT:    orr r2, r2, r1, lsr #24
259; CHECK-NEXT:    orr r1, r3, r1, lsl #24
260; CHECK-NEXT:    orr r1, r1, r2
261; CHECK-NEXT:    pop {r11, lr}
262; CHECK-NEXT:    mov pc, lr
263;
264; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap:
265; CHECK-ARMv6:       @ %bb.0:
266; CHECK-ARMv6-NEXT:    ldrd r2, r3, [r0]
267; CHECK-ARMv6-NEXT:    rev r0, r3
268; CHECK-ARMv6-NEXT:    rev r1, r2
269; CHECK-ARMv6-NEXT:    bx lr
270;
271; CHECK-THUMBv6-LABEL: load_i64_by_i8_bswap:
272; CHECK-THUMBv6:       @ %bb.0:
273; CHECK-THUMBv6-NEXT:    ldr r1, [r0]
274; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #4]
275; CHECK-THUMBv6-NEXT:    rev r0, r0
276; CHECK-THUMBv6-NEXT:    rev r1, r1
277; CHECK-THUMBv6-NEXT:    bx lr
278;
279; CHECK-THUMBv7-LABEL: load_i64_by_i8_bswap:
280; CHECK-THUMBv7:       @ %bb.0:
281; CHECK-THUMBv7-NEXT:    ldrd r1, r0, [r0]
282; CHECK-THUMBv7-NEXT:    rev r0, r0
283; CHECK-THUMBv7-NEXT:    rev r1, r1
284; CHECK-THUMBv7-NEXT:    bx lr
285
286  %tmp = bitcast i64* %arg to i8*
287  %tmp1 = load i8, i8* %tmp, align 8
288  %tmp2 = zext i8 %tmp1 to i64
289  %tmp3 = shl nuw i64 %tmp2, 56
290  %tmp4 = getelementptr inbounds i8, i8* %tmp, i64 1
291  %tmp5 = load i8, i8* %tmp4, align 1
292  %tmp6 = zext i8 %tmp5 to i64
293  %tmp7 = shl nuw nsw i64 %tmp6, 48
294  %tmp8 = or i64 %tmp7, %tmp3
295  %tmp9 = getelementptr inbounds i8, i8* %tmp, i64 2
296  %tmp10 = load i8, i8* %tmp9, align 1
297  %tmp11 = zext i8 %tmp10 to i64
298  %tmp12 = shl nuw nsw i64 %tmp11, 40
299  %tmp13 = or i64 %tmp8, %tmp12
300  %tmp14 = getelementptr inbounds i8, i8* %tmp, i64 3
301  %tmp15 = load i8, i8* %tmp14, align 1
302  %tmp16 = zext i8 %tmp15 to i64
303  %tmp17 = shl nuw nsw i64 %tmp16, 32
304  %tmp18 = or i64 %tmp13, %tmp17
305  %tmp19 = getelementptr inbounds i8, i8* %tmp, i64 4
306  %tmp20 = load i8, i8* %tmp19, align 1
307  %tmp21 = zext i8 %tmp20 to i64
308  %tmp22 = shl nuw nsw i64 %tmp21, 24
309  %tmp23 = or i64 %tmp18, %tmp22
310  %tmp24 = getelementptr inbounds i8, i8* %tmp, i64 5
311  %tmp25 = load i8, i8* %tmp24, align 1
312  %tmp26 = zext i8 %tmp25 to i64
313  %tmp27 = shl nuw nsw i64 %tmp26, 16
314  %tmp28 = or i64 %tmp23, %tmp27
315  %tmp29 = getelementptr inbounds i8, i8* %tmp, i64 6
316  %tmp30 = load i8, i8* %tmp29, align 1
317  %tmp31 = zext i8 %tmp30 to i64
318  %tmp32 = shl nuw nsw i64 %tmp31, 8
319  %tmp33 = or i64 %tmp28, %tmp32
320  %tmp34 = getelementptr inbounds i8, i8* %tmp, i64 7
321  %tmp35 = load i8, i8* %tmp34, align 1
322  %tmp36 = zext i8 %tmp35 to i64
323  %tmp37 = or i64 %tmp33, %tmp36
324  ret i64 %tmp37
325}
326
327; i8* p; // p[1] is 4 byte aligned
328; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
329define i32 @load_i32_by_i8_nonzero_offset(i32* %arg) {
330; CHECK-LABEL: load_i32_by_i8_nonzero_offset:
331; CHECK:       @ %bb.0:
332; CHECK-NEXT:    ldr r0, [r0, #1]
333; CHECK-NEXT:    mov pc, lr
334;
335; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset:
336; CHECK-ARMv6:       @ %bb.0:
337; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
338; CHECK-ARMv6-NEXT:    bx lr
339;
340; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset:
341; CHECK-THUMBv6:       @ %bb.0:
342; CHECK-THUMBv6-NEXT:    movs r1, #1
343; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
344; CHECK-THUMBv6-NEXT:    bx lr
345;
346; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset:
347; CHECK-THUMBv7:       @ %bb.0:
348; CHECK-THUMBv7-NEXT:    ldr.w r0, [r0, #1]
349; CHECK-THUMBv7-NEXT:    bx lr
350
351
352  %tmp = bitcast i32* %arg to i8*
353  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
354  %tmp2 = load i8, i8* %tmp1, align 4
355  %tmp3 = zext i8 %tmp2 to i32
356  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 2
357  %tmp5 = load i8, i8* %tmp4, align 1
358  %tmp6 = zext i8 %tmp5 to i32
359  %tmp7 = shl nuw nsw i32 %tmp6, 8
360  %tmp8 = or i32 %tmp7, %tmp3
361  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 3
362  %tmp10 = load i8, i8* %tmp9, align 1
363  %tmp11 = zext i8 %tmp10 to i32
364  %tmp12 = shl nuw nsw i32 %tmp11, 16
365  %tmp13 = or i32 %tmp8, %tmp12
366  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 4
367  %tmp15 = load i8, i8* %tmp14, align 1
368  %tmp16 = zext i8 %tmp15 to i32
369  %tmp17 = shl nuw nsw i32 %tmp16, 24
370  %tmp18 = or i32 %tmp13, %tmp17
371  ret i32 %tmp18
372}
373
374; i8* p; // p[-4] is 4 byte aligned
375; (i32) p[-4] | ((i32) p[-3] << 8) | ((i32) p[-2] << 16) | ((i32) p[-1] << 24)
376define i32 @load_i32_by_i8_neg_offset(i32* %arg) {
377; CHECK-LABEL: load_i32_by_i8_neg_offset:
378; CHECK:       @ %bb.0:
379; CHECK-NEXT:    ldr r0, [r0, #-4]
380; CHECK-NEXT:    mov pc, lr
381;
382; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset:
383; CHECK-ARMv6:       @ %bb.0:
384; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
385; CHECK-ARMv6-NEXT:    bx lr
386;
387; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset:
388; CHECK-THUMBv6:       @ %bb.0:
389; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
390; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
391; CHECK-THUMBv6-NEXT:    bx lr
392;
393; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset:
394; CHECK-THUMBv7:       @ %bb.0:
395; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #-4]
396; CHECK-THUMBv7-NEXT:    bx lr
397
398
399  %tmp = bitcast i32* %arg to i8*
400  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -4
401  %tmp2 = load i8, i8* %tmp1, align 4
402  %tmp3 = zext i8 %tmp2 to i32
403  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -3
404  %tmp5 = load i8, i8* %tmp4, align 1
405  %tmp6 = zext i8 %tmp5 to i32
406  %tmp7 = shl nuw nsw i32 %tmp6, 8
407  %tmp8 = or i32 %tmp7, %tmp3
408  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -2
409  %tmp10 = load i8, i8* %tmp9, align 1
410  %tmp11 = zext i8 %tmp10 to i32
411  %tmp12 = shl nuw nsw i32 %tmp11, 16
412  %tmp13 = or i32 %tmp8, %tmp12
413  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -1
414  %tmp15 = load i8, i8* %tmp14, align 1
415  %tmp16 = zext i8 %tmp15 to i32
416  %tmp17 = shl nuw nsw i32 %tmp16, 24
417  %tmp18 = or i32 %tmp13, %tmp17
418  ret i32 %tmp18
419}
420
421; i8* p; // p[1] is 4 byte aligned
422; (i32) p[4] | ((i32) p[3] << 8) | ((i32) p[2] << 16) | ((i32) p[1] << 24)
423define i32 @load_i32_by_i8_nonzero_offset_bswap(i32* %arg) {
424; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap:
425; CHECK:       @ %bb.0:
426; CHECK-NEXT:    ldr r0, [r0, #1]
427; CHECK-NEXT:    mov r1, #65280
428; CHECK-NEXT:    mov r2, #16711680
429; CHECK-NEXT:    and r1, r1, r0, lsr #8
430; CHECK-NEXT:    and r2, r2, r0, lsl #8
431; CHECK-NEXT:    orr r1, r1, r0, lsr #24
432; CHECK-NEXT:    orr r0, r2, r0, lsl #24
433; CHECK-NEXT:    orr r0, r0, r1
434; CHECK-NEXT:    mov pc, lr
435;
436; CHECK-ARMv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
437; CHECK-ARMv6:       @ %bb.0:
438; CHECK-ARMv6-NEXT:    ldr r0, [r0, #1]
439; CHECK-ARMv6-NEXT:    rev r0, r0
440; CHECK-ARMv6-NEXT:    bx lr
441;
442; CHECK-THUMBv6-LABEL: load_i32_by_i8_nonzero_offset_bswap:
443; CHECK-THUMBv6:       @ %bb.0:
444; CHECK-THUMBv6-NEXT:    movs r1, #1
445; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
446; CHECK-THUMBv6-NEXT:    rev r0, r0
447; CHECK-THUMBv6-NEXT:    bx lr
448;
449; CHECK-THUMBv7-LABEL: load_i32_by_i8_nonzero_offset_bswap:
450; CHECK-THUMBv7:       @ %bb.0:
451; CHECK-THUMBv7-NEXT:    ldr.w r0, [r0, #1]
452; CHECK-THUMBv7-NEXT:    rev r0, r0
453; CHECK-THUMBv7-NEXT:    bx lr
454
455
456  %tmp = bitcast i32* %arg to i8*
457  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 4
458  %tmp2 = load i8, i8* %tmp1, align 1
459  %tmp3 = zext i8 %tmp2 to i32
460  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 3
461  %tmp5 = load i8, i8* %tmp4, align 1
462  %tmp6 = zext i8 %tmp5 to i32
463  %tmp7 = shl nuw nsw i32 %tmp6, 8
464  %tmp8 = or i32 %tmp7, %tmp3
465  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 2
466  %tmp10 = load i8, i8* %tmp9, align 1
467  %tmp11 = zext i8 %tmp10 to i32
468  %tmp12 = shl nuw nsw i32 %tmp11, 16
469  %tmp13 = or i32 %tmp8, %tmp12
470  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 1
471  %tmp15 = load i8, i8* %tmp14, align 4
472  %tmp16 = zext i8 %tmp15 to i32
473  %tmp17 = shl nuw nsw i32 %tmp16, 24
474  %tmp18 = or i32 %tmp13, %tmp17
475  ret i32 %tmp18
476}
477
478; i8* p; // p[-4] is 4 byte aligned
479; (i32) p[-1] | ((i32) p[-2] << 8) | ((i32) p[-3] << 16) | ((i32) p[-4] << 24)
480define i32 @load_i32_by_i8_neg_offset_bswap(i32* %arg) {
481; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap:
482; CHECK:       @ %bb.0:
483; CHECK-NEXT:    ldr r0, [r0, #-4]
484; CHECK-NEXT:    mov r1, #65280
485; CHECK-NEXT:    mov r2, #16711680
486; CHECK-NEXT:    and r1, r1, r0, lsr #8
487; CHECK-NEXT:    and r2, r2, r0, lsl #8
488; CHECK-NEXT:    orr r1, r1, r0, lsr #24
489; CHECK-NEXT:    orr r0, r2, r0, lsl #24
490; CHECK-NEXT:    orr r0, r0, r1
491; CHECK-NEXT:    mov pc, lr
492;
493; CHECK-ARMv6-LABEL: load_i32_by_i8_neg_offset_bswap:
494; CHECK-ARMv6:       @ %bb.0:
495; CHECK-ARMv6-NEXT:    ldr r0, [r0, #-4]
496; CHECK-ARMv6-NEXT:    rev r0, r0
497; CHECK-ARMv6-NEXT:    bx lr
498;
499; CHECK-THUMBv6-LABEL: load_i32_by_i8_neg_offset_bswap:
500; CHECK-THUMBv6:       @ %bb.0:
501; CHECK-THUMBv6-NEXT:    subs r0, r0, #4
502; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
503; CHECK-THUMBv6-NEXT:    rev r0, r0
504; CHECK-THUMBv6-NEXT:    bx lr
505;
506; CHECK-THUMBv7-LABEL: load_i32_by_i8_neg_offset_bswap:
507; CHECK-THUMBv7:       @ %bb.0:
508; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #-4]
509; CHECK-THUMBv7-NEXT:    rev r0, r0
510; CHECK-THUMBv7-NEXT:    bx lr
511
512
513  %tmp = bitcast i32* %arg to i8*
514  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 -1
515  %tmp2 = load i8, i8* %tmp1, align 1
516  %tmp3 = zext i8 %tmp2 to i32
517  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 -2
518  %tmp5 = load i8, i8* %tmp4, align 1
519  %tmp6 = zext i8 %tmp5 to i32
520  %tmp7 = shl nuw nsw i32 %tmp6, 8
521  %tmp8 = or i32 %tmp7, %tmp3
522  %tmp9 = getelementptr inbounds i8, i8* %tmp, i32 -3
523  %tmp10 = load i8, i8* %tmp9, align 1
524  %tmp11 = zext i8 %tmp10 to i32
525  %tmp12 = shl nuw nsw i32 %tmp11, 16
526  %tmp13 = or i32 %tmp8, %tmp12
527  %tmp14 = getelementptr inbounds i8, i8* %tmp, i32 -4
528  %tmp15 = load i8, i8* %tmp14, align 4
529  %tmp16 = zext i8 %tmp15 to i32
530  %tmp17 = shl nuw nsw i32 %tmp16, 24
531  %tmp18 = or i32 %tmp13, %tmp17
532  ret i32 %tmp18
533}
534
535declare i16 @llvm.bswap.i16(i16)
536
537; i16* p; // p is 4 byte aligned
538; (i32) bswap(p[1]) | (i32) bswap(p[0] << 16)
539define i32 @load_i32_by_bswap_i16(i32* %arg) {
540; CHECK-LABEL: load_i32_by_bswap_i16:
541; CHECK:       @ %bb.0:
542; CHECK-NEXT:    ldr r0, [r0]
543; CHECK-NEXT:    mov r1, #65280
544; CHECK-NEXT:    mov r2, #16711680
545; CHECK-NEXT:    and r1, r1, r0, lsr #8
546; CHECK-NEXT:    and r2, r2, r0, lsl #8
547; CHECK-NEXT:    orr r1, r1, r0, lsr #24
548; CHECK-NEXT:    orr r0, r2, r0, lsl #24
549; CHECK-NEXT:    orr r0, r0, r1
550; CHECK-NEXT:    mov pc, lr
551;
552; CHECK-ARMv6-LABEL: load_i32_by_bswap_i16:
553; CHECK-ARMv6:       @ %bb.0:
554; CHECK-ARMv6-NEXT:    ldr r0, [r0]
555; CHECK-ARMv6-NEXT:    rev r0, r0
556; CHECK-ARMv6-NEXT:    bx lr
557;
558; CHECK-THUMBv6-LABEL: load_i32_by_bswap_i16:
559; CHECK-THUMBv6:       @ %bb.0:
560; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
561; CHECK-THUMBv6-NEXT:    rev r0, r0
562; CHECK-THUMBv6-NEXT:    bx lr
563;
564; CHECK-THUMBv7-LABEL: load_i32_by_bswap_i16:
565; CHECK-THUMBv7:       @ %bb.0:
566; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
567; CHECK-THUMBv7-NEXT:    rev r0, r0
568; CHECK-THUMBv7-NEXT:    bx lr
569
570
571  %tmp = bitcast i32* %arg to i16*
572  %tmp1 = load i16, i16* %tmp, align 4
573  %tmp11 = call i16 @llvm.bswap.i16(i16 %tmp1)
574  %tmp2 = zext i16 %tmp11 to i32
575  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
576  %tmp4 = load i16, i16* %tmp3, align 1
577  %tmp41 = call i16 @llvm.bswap.i16(i16 %tmp4)
578  %tmp5 = zext i16 %tmp41 to i32
579  %tmp6 = shl nuw nsw i32 %tmp2, 16
580  %tmp7 = or i32 %tmp6, %tmp5
581  ret i32 %tmp7
582}
583
584; i16* p;
585; (i32) p[0] | (sext(p[1] << 16) to i32)
586define i32 @load_i32_by_sext_i16(i32* %arg) {
587; CHECK-LABEL: load_i32_by_sext_i16:
588; CHECK:       @ %bb.0:
589; CHECK-NEXT:    ldr r0, [r0]
590; CHECK-NEXT:    mov pc, lr
591;
592; CHECK-ARMv6-LABEL: load_i32_by_sext_i16:
593; CHECK-ARMv6:       @ %bb.0:
594; CHECK-ARMv6-NEXT:    ldr r0, [r0]
595; CHECK-ARMv6-NEXT:    bx lr
596;
597; CHECK-THUMBv6-LABEL: load_i32_by_sext_i16:
598; CHECK-THUMBv6:       @ %bb.0:
599; CHECK-THUMBv6-NEXT:    ldr r0, [r0]
600; CHECK-THUMBv6-NEXT:    bx lr
601;
602; CHECK-THUMBv7-LABEL: load_i32_by_sext_i16:
603; CHECK-THUMBv7:       @ %bb.0:
604; CHECK-THUMBv7-NEXT:    ldr r0, [r0]
605; CHECK-THUMBv7-NEXT:    bx lr
606  %tmp = bitcast i32* %arg to i16*
607  %tmp1 = load i16, i16* %tmp, align 4
608  %tmp2 = zext i16 %tmp1 to i32
609  %tmp3 = getelementptr inbounds i16, i16* %tmp, i32 1
610  %tmp4 = load i16, i16* %tmp3, align 1
611  %tmp5 = sext i16 %tmp4 to i32
612  %tmp6 = shl nuw nsw i32 %tmp5, 16
613  %tmp7 = or i32 %tmp6, %tmp2
614  ret i32 %tmp7
615}
616
617; i8* arg; i32 i;
618; p = arg + 12;
619; (i32) p[i] | ((i32) p[i + 1] << 8) | ((i32) p[i + 2] << 16) | ((i32) p[i + 3] << 24)
620define i32 @load_i32_by_i8_base_offset_index(i8* %arg, i32 %i) {
621; CHECK-LABEL: load_i32_by_i8_base_offset_index:
622; CHECK:       @ %bb.0:
623; CHECK-NEXT:    add r0, r0, r1
624; CHECK-NEXT:    ldr r0, [r0, #12]
625; CHECK-NEXT:    mov pc, lr
626;
627; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index:
628; CHECK-ARMv6:       @ %bb.0:
629; CHECK-ARMv6-NEXT:    add r0, r0, r1
630; CHECK-ARMv6-NEXT:    ldr r0, [r0, #12]
631; CHECK-ARMv6-NEXT:    bx lr
632;
633; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index:
634; CHECK-THUMBv6:       @ %bb.0:
635; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
636; CHECK-THUMBv6-NEXT:    ldr r0, [r0, #12]
637; CHECK-THUMBv6-NEXT:    bx lr
638;
639; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index:
640; CHECK-THUMBv7:       @ %bb.0:
641; CHECK-THUMBv7-NEXT:    add r0, r1
642; CHECK-THUMBv7-NEXT:    ldr r0, [r0, #12]
643; CHECK-THUMBv7-NEXT:    bx lr
644
645  %tmp = add nuw nsw i32 %i, 3
646  %tmp2 = add nuw nsw i32 %i, 2
647  %tmp3 = add nuw nsw i32 %i, 1
648  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
649  %tmp5 = zext i32 %i to i64
650  %tmp6 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp5
651  %tmp7 = load i8, i8* %tmp6, align 4
652  %tmp8 = zext i8 %tmp7 to i32
653  %tmp9 = zext i32 %tmp3 to i64
654  %tmp10 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp9
655  %tmp11 = load i8, i8* %tmp10, align 1
656  %tmp12 = zext i8 %tmp11 to i32
657  %tmp13 = shl nuw nsw i32 %tmp12, 8
658  %tmp14 = or i32 %tmp13, %tmp8
659  %tmp15 = zext i32 %tmp2 to i64
660  %tmp16 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp15
661  %tmp17 = load i8, i8* %tmp16, align 1
662  %tmp18 = zext i8 %tmp17 to i32
663  %tmp19 = shl nuw nsw i32 %tmp18, 16
664  %tmp20 = or i32 %tmp14, %tmp19
665  %tmp21 = zext i32 %tmp to i64
666  %tmp22 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp21
667  %tmp23 = load i8, i8* %tmp22, align 1
668  %tmp24 = zext i8 %tmp23 to i32
669  %tmp25 = shl nuw i32 %tmp24, 24
670  %tmp26 = or i32 %tmp20, %tmp25
671  ret i32 %tmp26
672}
673
674; i8* arg; i32 i;
675; p = arg + 12;
676; (i32) p[i + 1] | ((i32) p[i + 2] << 8) | ((i32) p[i + 3] << 16) | ((i32) p[i + 4] << 24)
677define i32 @load_i32_by_i8_base_offset_index_2(i8* %arg, i32 %i) {
678; CHECK-LABEL: load_i32_by_i8_base_offset_index_2:
679; CHECK:       @ %bb.0:
680; CHECK-NEXT:    add r0, r1, r0
681; CHECK-NEXT:    ldr r0, [r0, #13]
682; CHECK-NEXT:    mov pc, lr
683;
684; CHECK-ARMv6-LABEL: load_i32_by_i8_base_offset_index_2:
685; CHECK-ARMv6:       @ %bb.0:
686; CHECK-ARMv6-NEXT:    add r0, r1, r0
687; CHECK-ARMv6-NEXT:    ldr r0, [r0, #13]
688; CHECK-ARMv6-NEXT:    bx lr
689;
690; CHECK-THUMBv6-LABEL: load_i32_by_i8_base_offset_index_2:
691; CHECK-THUMBv6:       @ %bb.0:
692; CHECK-THUMBv6-NEXT:    adds r0, r1, r0
693; CHECK-THUMBv6-NEXT:    movs r1, #13
694; CHECK-THUMBv6-NEXT:    ldr r0, [r0, r1]
695; CHECK-THUMBv6-NEXT:    bx lr
696;
697; CHECK-THUMBv7-LABEL: load_i32_by_i8_base_offset_index_2:
698; CHECK-THUMBv7:       @ %bb.0:
699; CHECK-THUMBv7-NEXT:    add r0, r1
700; CHECK-THUMBv7-NEXT:    ldr.w r0, [r0, #13]
701; CHECK-THUMBv7-NEXT:    bx lr
702  %tmp = add nuw nsw i32 %i, 4
703  %tmp2 = add nuw nsw i32 %i, 3
704  %tmp3 = add nuw nsw i32 %i, 2
705  %tmp4 = getelementptr inbounds i8, i8* %arg, i64 12
706  %tmp5 = add nuw nsw i32 %i, 1
707  %tmp27 = zext i32 %tmp5 to i64
708  %tmp28 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp27
709  %tmp29 = load i8, i8* %tmp28, align 4
710  %tmp30 = zext i8 %tmp29 to i32
711  %tmp31 = zext i32 %tmp3 to i64
712  %tmp32 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp31
713  %tmp33 = load i8, i8* %tmp32, align 1
714  %tmp34 = zext i8 %tmp33 to i32
715  %tmp35 = shl nuw nsw i32 %tmp34, 8
716  %tmp36 = or i32 %tmp35, %tmp30
717  %tmp37 = zext i32 %tmp2 to i64
718  %tmp38 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp37
719  %tmp39 = load i8, i8* %tmp38, align 1
720  %tmp40 = zext i8 %tmp39 to i32
721  %tmp41 = shl nuw nsw i32 %tmp40, 16
722  %tmp42 = or i32 %tmp36, %tmp41
723  %tmp43 = zext i32 %tmp to i64
724  %tmp44 = getelementptr inbounds i8, i8* %tmp4, i64 %tmp43
725  %tmp45 = load i8, i8* %tmp44, align 1
726  %tmp46 = zext i8 %tmp45 to i32
727  %tmp47 = shl nuw i32 %tmp46, 24
728  %tmp48 = or i32 %tmp42, %tmp47
729  ret i32 %tmp48
730}
731
732; i8* p; // p is 2 byte aligned
733; (i32) p[0] | ((i32) p[1] << 8)
734define i32 @zext_load_i32_by_i8(i32* %arg) {
735; CHECK-LABEL: zext_load_i32_by_i8:
736; CHECK:       @ %bb.0:
737; CHECK-NEXT:    ldrh r0, [r0]
738; CHECK-NEXT:    mov pc, lr
739;
740; CHECK-ARMv6-LABEL: zext_load_i32_by_i8:
741; CHECK-ARMv6:       @ %bb.0:
742; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
743; CHECK-ARMv6-NEXT:    bx lr
744;
745; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8:
746; CHECK-THUMBv6:       @ %bb.0:
747; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
748; CHECK-THUMBv6-NEXT:    bx lr
749;
750; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8:
751; CHECK-THUMBv7:       @ %bb.0:
752; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
753; CHECK-THUMBv7-NEXT:    bx lr
754
755  %tmp = bitcast i32* %arg to i8*
756  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
757  %tmp2 = load i8, i8* %tmp1, align 2
758  %tmp3 = zext i8 %tmp2 to i32
759  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
760  %tmp5 = load i8, i8* %tmp4, align 1
761  %tmp6 = zext i8 %tmp5 to i32
762  %tmp7 = shl nuw nsw i32 %tmp6, 8
763  %tmp8 = or i32 %tmp7, %tmp3
764  ret i32 %tmp8
765}
766
767; i8* p; // p is 2 byte aligned
768; ((i32) p[0] << 8) | ((i32) p[1] << 16)
769define i32 @zext_load_i32_by_i8_shl_8(i32* %arg) {
770; CHECK-LABEL: zext_load_i32_by_i8_shl_8:
771; CHECK:       @ %bb.0:
772; CHECK-NEXT:    ldrb r1, [r0]
773; CHECK-NEXT:    ldrb r0, [r0, #1]
774; CHECK-NEXT:    lsl r0, r0, #16
775; CHECK-NEXT:    orr r0, r0, r1, lsl #8
776; CHECK-NEXT:    mov pc, lr
777;
778; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_8:
779; CHECK-ARMv6:       @ %bb.0:
780; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
781; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
782; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
783; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #8
784; CHECK-ARMv6-NEXT:    bx lr
785;
786; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_8:
787; CHECK-THUMBv6:       @ %bb.0:
788; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
789; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
790; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
791; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
792; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
793; CHECK-THUMBv6-NEXT:    bx lr
794;
795; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_8:
796; CHECK-THUMBv7:       @ %bb.0:
797; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
798; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
799; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
800; CHECK-THUMBv7-NEXT:    orr.w r0, r0, r1, lsl #8
801; CHECK-THUMBv7-NEXT:    bx lr
802
803  %tmp = bitcast i32* %arg to i8*
804  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
805  %tmp2 = load i8, i8* %tmp1, align 2
806  %tmp3 = zext i8 %tmp2 to i32
807  %tmp30 = shl nuw nsw i32 %tmp3, 8
808  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
809  %tmp5 = load i8, i8* %tmp4, align 1
810  %tmp6 = zext i8 %tmp5 to i32
811  %tmp7 = shl nuw nsw i32 %tmp6, 16
812  %tmp8 = or i32 %tmp7, %tmp30
813  ret i32 %tmp8
814}
815
816; i8* p; // p is 2 byte aligned
817; ((i32) p[0] << 16) | ((i32) p[1] << 24)
818define i32 @zext_load_i32_by_i8_shl_16(i32* %arg) {
819; CHECK-LABEL: zext_load_i32_by_i8_shl_16:
820; CHECK:       @ %bb.0:
821; CHECK-NEXT:    ldrb r1, [r0]
822; CHECK-NEXT:    ldrb r0, [r0, #1]
823; CHECK-NEXT:    lsl r0, r0, #24
824; CHECK-NEXT:    orr r0, r0, r1, lsl #16
825; CHECK-NEXT:    mov pc, lr
826;
827; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_shl_16:
828; CHECK-ARMv6:       @ %bb.0:
829; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
830; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
831; CHECK-ARMv6-NEXT:    lsl r0, r0, #24
832; CHECK-ARMv6-NEXT:    orr r0, r0, r1, lsl #16
833; CHECK-ARMv6-NEXT:    bx lr
834;
835; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_shl_16:
836; CHECK-THUMBv6:       @ %bb.0:
837; CHECK-THUMBv6-NEXT:    ldrb r1, [r0]
838; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
839; CHECK-THUMBv6-NEXT:    ldrb r0, [r0, #1]
840; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
841; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
842; CHECK-THUMBv6-NEXT:    bx lr
843;
844; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_shl_16:
845; CHECK-THUMBv7:       @ %bb.0:
846; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
847; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
848; CHECK-THUMBv7-NEXT:    lsls r0, r0, #24
849; CHECK-THUMBv7-NEXT:    orr.w r0, r0, r1, lsl #16
850; CHECK-THUMBv7-NEXT:    bx lr
851
852  %tmp = bitcast i32* %arg to i8*
853  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 0
854  %tmp2 = load i8, i8* %tmp1, align 2
855  %tmp3 = zext i8 %tmp2 to i32
856  %tmp30 = shl nuw nsw i32 %tmp3, 16
857  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 1
858  %tmp5 = load i8, i8* %tmp4, align 1
859  %tmp6 = zext i8 %tmp5 to i32
860  %tmp7 = shl nuw nsw i32 %tmp6, 24
861  %tmp8 = or i32 %tmp7, %tmp30
862  ret i32 %tmp8
863}
864
865; i8* p; // p is 2 byte aligned
866; (i32) p[1] | ((i32) p[0] << 8)
867define i32 @zext_load_i32_by_i8_bswap(i32* %arg) {
868; CHECK-LABEL: zext_load_i32_by_i8_bswap:
869; CHECK:       @ %bb.0:
870; CHECK-NEXT:    ldrb r1, [r0]
871; CHECK-NEXT:    ldrb r0, [r0, #1]
872; CHECK-NEXT:    orr r0, r0, r1, lsl #8
873; CHECK-NEXT:    mov pc, lr
874;
875; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap:
876; CHECK-ARMv6:       @ %bb.0:
877; CHECK-ARMv6-NEXT:    ldrh r0, [r0]
878; CHECK-ARMv6-NEXT:    lsl r0, r0, #16
879; CHECK-ARMv6-NEXT:    rev r0, r0
880; CHECK-ARMv6-NEXT:    bx lr
881;
882; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap:
883; CHECK-THUMBv6:       @ %bb.0:
884; CHECK-THUMBv6-NEXT:    ldrh r0, [r0]
885; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
886; CHECK-THUMBv6-NEXT:    rev r0, r0
887; CHECK-THUMBv6-NEXT:    bx lr
888;
889; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap:
890; CHECK-THUMBv7:       @ %bb.0:
891; CHECK-THUMBv7-NEXT:    ldrh r0, [r0]
892; CHECK-THUMBv7-NEXT:    lsls r0, r0, #16
893; CHECK-THUMBv7-NEXT:    rev r0, r0
894; CHECK-THUMBv7-NEXT:    bx lr
895
896  %tmp = bitcast i32* %arg to i8*
897  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
898  %tmp2 = load i8, i8* %tmp1, align 1
899  %tmp3 = zext i8 %tmp2 to i32
900  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
901  %tmp5 = load i8, i8* %tmp4, align 2
902  %tmp6 = zext i8 %tmp5 to i32
903  %tmp7 = shl nuw nsw i32 %tmp6, 8
904  %tmp8 = or i32 %tmp7, %tmp3
905  ret i32 %tmp8
906}
907
908; i8* p; // p is 2 byte aligned
909; ((i32) p[1] << 8) | ((i32) p[0] << 16)
910define i32 @zext_load_i32_by_i8_bswap_shl_8(i32* %arg) {
911; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_8:
912; CHECK:       @ %bb.0:
913; CHECK-NEXT:    ldrb r1, [r0]
914; CHECK-NEXT:    ldrb r0, [r0, #1]
915; CHECK-NEXT:    lsl r1, r1, #16
916; CHECK-NEXT:    orr r0, r1, r0, lsl #8
917; CHECK-NEXT:    mov pc, lr
918;
919; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
920; CHECK-ARMv6:       @ %bb.0:
921; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
922; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
923; CHECK-ARMv6-NEXT:    lsl r1, r1, #16
924; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #8
925; CHECK-ARMv6-NEXT:    bx lr
926;
927; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_8:
928; CHECK-THUMBv6:       @ %bb.0:
929; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
930; CHECK-THUMBv6-NEXT:    lsls r1, r1, #8
931; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
932; CHECK-THUMBv6-NEXT:    lsls r0, r0, #16
933; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
934; CHECK-THUMBv6-NEXT:    bx lr
935;
936; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_8:
937; CHECK-THUMBv7:       @ %bb.0:
938; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
939; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
940; CHECK-THUMBv7-NEXT:    lsls r1, r1, #16
941; CHECK-THUMBv7-NEXT:    orr.w r0, r1, r0, lsl #8
942; CHECK-THUMBv7-NEXT:    bx lr
943
944  %tmp = bitcast i32* %arg to i8*
945  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
946  %tmp2 = load i8, i8* %tmp1, align 1
947  %tmp3 = zext i8 %tmp2 to i32
948  %tmp30 = shl nuw nsw i32 %tmp3, 8
949  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
950  %tmp5 = load i8, i8* %tmp4, align 2
951  %tmp6 = zext i8 %tmp5 to i32
952  %tmp7 = shl nuw nsw i32 %tmp6, 16
953  %tmp8 = or i32 %tmp7, %tmp30
954  ret i32 %tmp8
955}
956
957; i8* p; // p is 2 byte aligned
958; ((i32) p[1] << 16) | ((i32) p[0] << 24)
959define i32 @zext_load_i32_by_i8_bswap_shl_16(i32* %arg) {
960; CHECK-LABEL: zext_load_i32_by_i8_bswap_shl_16:
961; CHECK:       @ %bb.0:
962; CHECK-NEXT:    ldrb r1, [r0]
963; CHECK-NEXT:    ldrb r0, [r0, #1]
964; CHECK-NEXT:    lsl r1, r1, #24
965; CHECK-NEXT:    orr r0, r1, r0, lsl #16
966; CHECK-NEXT:    mov pc, lr
967;
968; CHECK-ARMv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
969; CHECK-ARMv6:       @ %bb.0:
970; CHECK-ARMv6-NEXT:    ldrb r1, [r0]
971; CHECK-ARMv6-NEXT:    ldrb r0, [r0, #1]
972; CHECK-ARMv6-NEXT:    lsl r1, r1, #24
973; CHECK-ARMv6-NEXT:    orr r0, r1, r0, lsl #16
974; CHECK-ARMv6-NEXT:    bx lr
975;
976; CHECK-THUMBv6-LABEL: zext_load_i32_by_i8_bswap_shl_16:
977; CHECK-THUMBv6:       @ %bb.0:
978; CHECK-THUMBv6-NEXT:    ldrb r1, [r0, #1]
979; CHECK-THUMBv6-NEXT:    lsls r1, r1, #16
980; CHECK-THUMBv6-NEXT:    ldrb r0, [r0]
981; CHECK-THUMBv6-NEXT:    lsls r0, r0, #24
982; CHECK-THUMBv6-NEXT:    adds r0, r0, r1
983; CHECK-THUMBv6-NEXT:    bx lr
984;
985; CHECK-THUMBv7-LABEL: zext_load_i32_by_i8_bswap_shl_16:
986; CHECK-THUMBv7:       @ %bb.0:
987; CHECK-THUMBv7-NEXT:    ldrb r1, [r0]
988; CHECK-THUMBv7-NEXT:    ldrb r0, [r0, #1]
989; CHECK-THUMBv7-NEXT:    lsls r1, r1, #24
990; CHECK-THUMBv7-NEXT:    orr.w r0, r1, r0, lsl #16
991; CHECK-THUMBv7-NEXT:    bx lr
992
993  %tmp = bitcast i32* %arg to i8*
994  %tmp1 = getelementptr inbounds i8, i8* %tmp, i32 1
995  %tmp2 = load i8, i8* %tmp1, align 1
996  %tmp3 = zext i8 %tmp2 to i32
997  %tmp30 = shl nuw nsw i32 %tmp3, 16
998  %tmp4 = getelementptr inbounds i8, i8* %tmp, i32 0
999  %tmp5 = load i8, i8* %tmp4, align 2
1000  %tmp6 = zext i8 %tmp5 to i32
1001  %tmp7 = shl nuw nsw i32 %tmp6, 24
1002  %tmp8 = or i32 %tmp7, %tmp30
1003  ret i32 %tmp8
1004}
1005